Fix folding of vector mask EQ/NE expressions

Richard Sandiford Mon, 06 Nov 2017 07:27:53 -0800

fold_binary_loc assumed that if the type of the result wasn't a vector,
the operands wouldn't be either.  This isn't necessarily true for
EQ_EXPR and NE_EXPR of vector masks, which can return a single scalar
for the mask as a whole.


Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64-linux-gnu.
OK to install?

Richard


2017-11-06  Richard Sandiford  <richard.sandif...@linaro.org>
            Alan Hayward  <alan.hayw...@arm.com>
            David Sherwood  <david.sherw...@arm.com>

gcc/
        * fold-const.c (fold_binary_loc): Check the argument types
        rather than the result type when testing for a vector operation.

gcc/testsuite/
        * gcc.target/aarch64/sve_vec_bool_cmp_1.c: New test.
        * gcc.target/aarch64/sve_vec_bool_cmp_1_run.c: Likweise.

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c    2017-11-06 15:26:35.441288554 +0000
+++ gcc/fold-const.c    2017-11-06 15:26:35.595423552 +0000
@@ -9392,7 +9392,7 @@ fold_binary_loc (location_t loc,
 
   if ((code == BIT_AND_EXPR || code == BIT_IOR_EXPR
        || code == EQ_EXPR || code == NE_EXPR)
-      && TREE_CODE (type) != VECTOR_TYPE
+      && TREE_CODE (TREE_TYPE (arg0)) != VECTOR_TYPE
       && ((truth_value_p (TREE_CODE (arg0))
           && (truth_value_p (TREE_CODE (arg1))
               || (TREE_CODE (arg1) == BIT_AND_EXPR
Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c
===================================================================
--- /dev/null   2017-11-03 22:04:05.605699023 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1.c       2017-11-06 
15:26:35.595423552 +0000
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#define VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)                         \
+void __attribute__ ((noinline, noclone))                               \
+vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (VARTYPE *dst, VARTYPE *src, \
+                                          INDUCTYPE start,             \
+                                          INDUCTYPE n,                 \
+                                          INDUCTYPE mask)              \
+{                                                                      \
+  for (INDUCTYPE i = 0; i < n; i++)                                    \
+    {                                                                  \
+      bool lhs = i >= start;                                           \
+      bool rhs = (i & mask) != 0x3D;                                   \
+      if (lhs OP rhs)                                                  \
+        dst[i] = src[i];                                               \
+    }                                                                  \
+}
+
+#define TEST_OP(T, NAME, OP)                   \
+  T (NAME, OP, uint8_t, uint8_t)               \
+  T (NAME, OP, uint16_t, uint16_t)             \
+  T (NAME, OP, uint32_t, uint32_t)             \
+  T (NAME, OP, uint64_t, uint64_t)             \
+  T (NAME, OP, float, uint32_t)                        \
+  T (NAME, OP, double, uint64_t)
+
+#define TEST_ALL(T)                            \
+  TEST_OP (T, cmpeq, ==)                       \
+  TEST_OP (T, cmpne, !=)
+
+TEST_ALL (VEC_BOOL)
+
+/* Both cmpne and cmpeq loops will contain an exclusive predicate or.  */
+/* { dg-final { scan-assembler-times {\teors?\tp[0-9]*\.b, p[0-7]/z, 
p[0-9]*\.b, p[0-9]*\.b\n} 12 } } */
+/* cmpeq will also contain a predicate not operation.  */
+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]*\.b, p[0-7]/z, 
p[0-9]*\.b\n} 6 } } */
Index: gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c
===================================================================
--- /dev/null   2017-11-03 22:04:05.605699023 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve_vec_bool_cmp_1_run.c   2017-11-06 
15:26:35.595423552 +0000
@@ -0,0 +1,37 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O3 -fno-inline -march=armv8-a+sve" } */
+
+#include "sve_vec_bool_cmp_1.c"
+
+#define N 103
+
+#define TEST_VEC_BOOL(NAME, OP, VARTYPE, INDUCTYPE)            \
+{                                                              \
+  INDUCTYPE i;                                                 \
+  VARTYPE src[N];                                              \
+  VARTYPE dst[N];                                              \
+  for (i = 0; i < N; i++)                                      \
+    {                                                          \
+      src[i] = i;                                              \
+      dst[i] = i * 2;                                          \
+      asm volatile ("" ::: "memory");                          \
+    }                                                          \
+  vec_bool_##NAME##_##VARTYPE##_##INDUCTYPE (dst, src, 13,     \
+                                            97, 0xFF);         \
+  for (i = 0; i < 13; i++)                                     \
+    if (dst[i] != (VARTYPE) (0 OP 1 ? i : i * 2))              \
+      __builtin_abort ();                                      \
+  for (i = 13; i < 97; i++)                                    \
+    if (dst[i] != (VARTYPE) (1 OP (i != 0x3D) ? i : i * 2))    \
+      __builtin_abort ();                                      \
+  for (i = 97; i < N; i++)                                     \
+    if (dst[i] != (i * 2))                                     \
+      __builtin_abort ();                                      \
+}
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_VEC_BOOL)
+  return 0;
+}

Fix folding of vector mask EQ/NE expressions

Reply via email to