[PATCH V2] RISC-V: Support combine cond extend and reduce sum to widen reduce sum

Lehua Ding Wed, 20 Sep 2023 00:58:13 -0700

V2 Change: Use new method to simple move const 0 to vector.

This patch support combining cond extend and reduce_sum to cond widen reduce_sum
like combine the following three insns:
  (set (reg:RVVM2HI 149)
       (const_vector:RVVM2HI repeat [
          (const_int 0)
       ]))
  (set (reg:RVVM2HI 138)
    (if_then_else:RVVM2HI
      (reg:RVVMF8BI 135)
      (reg:RVVM2HI 148)
      (reg:RVVM2HI 149)))
  (set (reg:HI 150)
    (unspec:HI [
      (reg:RVVM2HI 138)
    ] UNSPEC_REDUC_SUM))
into one insn:
  (set (reg:SI 147)
    (unspec:SI [
      (if_then_else:RVVM2SI
        (reg:RVVMF16BI 135)
        (sign_extend:RVVM2SI (reg:RVVM1HI 136))
        (const_vector:RVVM2SI repeat [
          (const_int 0)
        ]))
    ] UNSPEC_REDUC_SUM))


Consider the following C code:

int16_t foo (int8_t *restrict a, int8_t *restrict pred)
{
  int16_t sum = 0;
  for (int i = 0; i < 16; i += 1)
    if (pred[i])
      sum += a[i];
  return sum;
}

assembly before this patch:

foo:
        vsetivli        zero,16,e16,m2,ta,ma
        li      a5,0
        vmv.v.i v2,0
        vsetvli zero,zero,e8,m1,ta,ma
        vl1re8.v        v0,0(a1)
        vmsne.vi        v0,v0,0
        vsetvli zero,zero,e16,m2,ta,mu
        vle8.v  v4,0(a0),v0.t
        vmv.s.x v1,a5
        vsext.vf2       v2,v4,v0.t
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

assembly after this patch:

foo:
        li      a5,0
        vsetivli        zero,16,e16,m1,ta,ma
        vmv.s.x v3,a5
        vsetivli        zero,16,e8,m1,ta,ma
        vl1re8.v        v0,0(a1)
        vmsne.vi        v0,v0,0
        vle8.v  v2,0(a0),v0.t
        vwredsum.vs     v1,v2,v3,v0.t
        vsetivli        zero,0,e16,m1,ta,ma
        vmv.x.s a0,v1
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

gcc/ChangeLog:

        * config/riscv/autovec-opt.md (@mov_vec_const_0<mode>):
        New helper pattern.
        (*cond_widen_reduc_plus_scal_<mode>): New combine pattern.
        * config/riscv/riscv-protos.h (enum insn_type): Ditto.
        * config/riscv/riscv-v.cc (expand_const_vector): Gen new pattern.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c: New test.
        * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c: New test.

---
 gcc/config/riscv/autovec-opt.md               | 64 +++++++++++++++++++
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv-v.cc                   |  7 +-
 .../rvv/autovec/cond/cond_widen_reduc-1.c     | 30 +++++++++
 .../rvv/autovec/cond/cond_widen_reduc_run-1.c | 28 ++++++++
 5 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 66c77ad6ebb..5cc13c85fe5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -185,6 +185,22 @@
   [(set_attr "type" "vimovvx")
    (set_attr "mode" "<MODE>")])

+;; Let the mov pattern move 0 to vector remain simple pattern before split1.
+;; This simple pattern will let more patterns be made to combine successfully.
+(define_insn_and_split "@mov_vec_const_0<mode>"
+  [(set (match_operand:V_VLS 0 "register_operand")
+        (match_operand:V_VLS 1 "vector_const_0_operand"))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_mov (<MODE>mode),
+                                   riscv_vector::UNARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vimov")])
+
 ;; 
=============================================================================
 ;; All combine patterns for combine pass.
 ;; 
=============================================================================
@@ -1175,6 +1191,54 @@
   }
   [(set_attr "type" "vfwmuladd")])

+;; Combine mask_extend + vredsum to mask_vwredsum[u]
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (match_operand:<VM> 1 "register_operand")
+            (any_extend:<V_DOUBLE_EXTEND>
+              (match_operand:VI_QHS_NO_M8 2 "register_operand"))
+            (match_operand:<V_DOUBLE_EXTEND> 3 "vector_const_0_operand"))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[2], operands[1],
+               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine mask_extend + vfredsum to mask_vfwredusum
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (match_operand:<VM> 1 "register_operand")
+            (float_extend:<V_DOUBLE_EXTEND>
+              (match_operand:VF_HS_NO_M8 2 "register_operand"))
+            (match_operand:<V_DOUBLE_EXTEND> 3 "vector_const_0_operand"))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[2], operands[1],
+               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; 
=============================================================================
 ;; Misc combine patterns
 ;; 
=============================================================================
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9ea0bcf15d3..a75b0b485b4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -337,6 +337,7 @@ enum insn_type : unsigned int

   /* For vreduce, no mask policy operand. */
   REDUCE_OP = __NORMAL_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
+  REDUCE_OP_M = __MASK_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
   REDUCE_OP_FRM_DYN = REDUCE_OP | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
   REDUCE_OP_M_FRM_DYN
   = __MASK_OP_TA | BINARY_OP_P | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 64a71a128d4..d2687969997 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -973,7 +973,12 @@ expand_const_vector (rtx target, rtx src)
       rtx tmp = register_operand (target, mode) ? target : gen_reg_rtx (mode);
       /* Element in range -16 ~ 15 integer or 0.0 floating-point,
         we use vmv.v.i instruction.  */
-      if (satisfies_constraint_vi (src) || satisfies_constraint_Wc0 (src))
+      /* For const int or float 0, we keep the simple pattern before split1
+        pass. */
+      if ((can_create_pseudo_p () && !lra_in_progress)
+         && satisfies_constraint_Wc0 (src))
+       emit_insn (gen_mov_vec_const_0 (mode, tmp, src));
+      else if (satisfies_constraint_vi (src))
        {
          rtx ops[] = {tmp, src};
          emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
new file mode 100644
index 00000000000..22a71048684
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param 
riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 
-fno-vect-cost-model -ffast-math" } */
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2, N)                                             
\
+  __attribute__ ((noipa))                                                      
\
+  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      
\
+  {                                                                            
\
+    TYPE1 sum = 0;                                                             
\
+    for (int i = 0; i < N; i += 1)                                             
\
+      if (pred[i])                                                             
\
+       sum += a[i];                                                           \
+    return sum;                                                                
\
+  }
+
+#define TEST_ALL(TEST)                                                         
\
+  TEST (int16_t, int8_t, 16)                                                   
\
+  TEST (int32_t, int16_t, 8)                                                   
\
+  TEST (int64_t, int32_t, 4)                                                   
\
+  TEST (uint16_t, uint8_t, 16)                                                 
\
+  TEST (uint32_t, uint16_t, 8)                                                 
\
+  TEST (uint64_t, uint32_t, 4)                                                 
\
+  TEST (float, _Float16, 8)                                                    
\
+  TEST (double, float, 4)
+
+TEST_ALL (TEST_TYPE)
+
+/* { dg-final { scan-assembler-times 
{\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times 
{\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times 
{\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c
new file mode 100644
index 00000000000..fdb7e5249ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable 
-fno-vect-cost-model" } */
+
+#include "cond_widen_reduc-1.c"
+
+#define RUN(TYPE1, TYPE2, N)                                                   
\
+  {                                                                            
\
+    TYPE2 a[N];                                                                
\
+    TYPE2 pred[N];                                                             
\
+    TYPE1 r = 0;                                                               
\
+    for (int i = 0; i < N; i++)                                                
\
+      {                                                                        
\
+       a[i] = (i * 0.1) * (i & 1 ? 1 : -1);                                   \
+       pred[i] = i % 3;                                                       \
+       if (pred[i])                                                           \
+         r += a[i];                                                           \
+       asm volatile ("" ::: "memory");                                        \
+      }                                                                        
\
+    if (r != reduc_##TYPE1##_##TYPE2 (a, pred))                                
\
+      __builtin_abort ();                                                      
\
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (RUN)
+  return 0;
+}
--
2.36.3

[PATCH V2] RISC-V: Support combine cond extend and reduce sum to widen reduce sum

Reply via email to