Hi,

This implements mask reductions by first counting the bits in the mask
(vcpop.m) and then comparing the resulting scalar against 0 or len.

Changes from v1:
 - Add tests (almost verbatim from aarch64).

Regtested on rv64gcv_zvl512b.

Regards
 Robin

gcc/ChangeLog:

        * config/riscv/autovec.md (reduc_sbool_and_scal_<mode>): New
        expander.
        (reduc_sbool_ior_scal_<mode>): Ditto.
        (reduc_sbool_xor_scal_<mode>): Ditto.
        * config/riscv/riscv-protos.h (expand_mask_reduction): Declare.
        * config/riscv/riscv-v.cc (expand_mask_reduction): New function.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c: New test.
        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c: New test.
        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c: New test.
        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c: New test.
        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c: New test.
        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c: New test.
        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c: New test.
        * gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c: New test.
---
 gcc/config/riscv/autovec.md                   | 31 +++++++++++
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv-v.cc                   | 48 +++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-1.c    | 52 +++++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-2.c    | 52 +++++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-3.c    | 52 +++++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-4.c    | 52 +++++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-5.c    | 50 ++++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-6.c    | 50 ++++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-7.c    | 50 ++++++++++++++++++
 .../riscv/rvv/autovec/reduc/reduc-bool-8.c    | 50 ++++++++++++++++++
 11 files changed, 488 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index cec0113fca3..c694684328f 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2301,6 +2301,37 @@ (define_expand "reduc_xor_scal_<mode>"
   DONE;
 })
 
+;; -------------------------------------------------------------------------
+;; ---- [INT] Mask reductions
+;; -------------------------------------------------------------------------
+
+(define_expand "reduc_sbool_and_scal_<mode>"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:VB_VLS 1 "register_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::expand_mask_reduction (operands, AND);
+  DONE;
+})
+
+(define_expand "reduc_sbool_ior_scal_<mode>"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:VB_VLS 1 "register_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::expand_mask_reduction (operands, IOR);
+  DONE;
+})
+
+(define_expand "reduc_sbool_xor_scal_<mode>"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:VB_VLS 1 "register_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::expand_mask_reduction (operands, XOR);
+  DONE;
+})
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Tree reductions
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a372779cf9f..d97773256c9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -664,6 +664,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
 void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
+void expand_mask_reduction (rtx *, rtx_code);
 void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 5e30b77b4eb..f4a35fdf6ad 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4885,6 +4885,54 @@ expand_reduction (unsigned unspec, unsigned 
unspec_for_vl0_safe,
   emit_insn (gen_pred_extract_first (m1_mode, scalar_dest, m1_tmp2));
 }
 
+/* Expand mask reductions.  OPS are {dest, src} where DEST's mode
+   is QImode and SRC's mode is a mask mode.
+   CODE is one of AND, IOR, XOR.  */
+
+void
+expand_mask_reduction (rtx *ops, rtx_code code)
+{
+  machine_mode mode = GET_MODE (ops[1]);
+  rtx dest = ops[0];
+  gcc_assert (GET_MODE (dest) == QImode);
+
+  rtx tmp = gen_reg_rtx (Xmode);
+  rtx cpop_ops[] = {tmp, ops[1]};
+  emit_vlmax_insn (code_for_pred_popcount (mode, Xmode), CPOP_OP, cpop_ops);
+
+  bool eq_zero = false;
+
+  /* AND reduction is popcount (mask) == len,
+     IOR reduction is popcount (mask) != 0,
+     XOR reduction is popcount (mask) & 1 != 0.  */
+  if (code == AND)
+    {
+      rtx len = gen_int_mode (GET_MODE_NUNITS (mode), HImode);
+      tmp = expand_binop (Xmode, sub_optab, tmp, len, NULL, true,
+                         OPTAB_DIRECT);
+      eq_zero = true;
+    }
+  else if (code == IOR)
+    ;
+  else if (code == XOR)
+    tmp = expand_binop (Xmode, and_optab, tmp, GEN_INT (1), NULL, true,
+                       OPTAB_DIRECT);
+  else
+    gcc_unreachable ();
+
+  rtx els = gen_label_rtx ();
+  rtx end = gen_label_rtx ();
+
+  riscv_expand_conditional_branch (els, eq_zero ? EQ : NE, tmp, const0_rtx);
+  emit_move_insn (dest, const0_rtx);
+  emit_jump_insn (gen_jump (end));
+  emit_barrier ();
+
+  emit_label (els);
+  emit_move_insn (dest, const1_rtx);
+  emit_label (end);
+}
+
 /* Prepare ops for ternary operations.
    It can be called before or after RA.  */
 void
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c
new file mode 100644
index 00000000000..d4d540126ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c
new file mode 100644
index 00000000000..3256206b5fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c
new file mode 100644
index 00000000000..4303cf3846c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c
new file mode 100644
index 00000000000..bf85ca7eb45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-4.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c
new file mode 100644
index 00000000000..3ed5a6c6799
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-5.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c
new file mode 100644
index 00000000000..6f0de436413
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-6.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c
new file mode 100644
index 00000000000..233d5b67c3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-7.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c
new file mode 100644
index 00000000000..80a27e0811c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-bool-8.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-additional-options "-mrvv-vector-bits=scalable -fno-vect-cost-model 
-fdump-tree-vect-details" } */
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
-- 
2.51.1


Reply via email to