From: Pan Li <pan2...@intel.com>

This patch would like to combine the vec_duplicate + vmacc.vv to the
vmacc.vx.  From example as below code.  The related pattern will depend
on the cost of vec_duplicate from GR2VR.  Then the late-combine will
take action if the cost of GR2VR is zero, and reject the combination
if the GR2VR cost is greater than zero.

Assume we have example code like below, GR2VR cost is 0.

  #define DEF_VX_TERNARY_CASE_0(T, OP_1, OP_2, NAME)                        \
  void                                                                      \
  test_vx_ternary_##NAME##_##T##_case_0 (T * restrict vd, T * restrict vs2, \
                                         T rs1, unsigned n)                 \
  {                                                                         \
    for (unsigned i = 0; i < n; i++)                                        \
      vd[i] = vd[i] OP_2 vs2[i] OP_1 rs1;                                   \
  }

  DEF_VX_TERNARY_CASE_0(int32_t, *, +, macc)

Before this patch:
  11   │     beq a3,zero,.L8
  12   │     vsetvli a5,zero,e32,m1,ta,ma
  13   │     vmv.v.x v2,a2
  ...
  16   │ .L3:
  17   │     vsetvli a5,a3,e32,m1,ta,ma
  ...
  22   │     vmacc.vv v1,v2,v3
  ...
  25   │     bne a3,zero,.L3

After this patch:
  11   │     beq a3,zero,.L8
  ...
  14   │ .L3:
  15   │     vsetvli a5,a3,e32,m1,ta,ma
  ...
  20   │     vmacc.vx v1,a2,v3
  ...
  23   │     bne a3,zero,.L3

gcc/ChangeLog:

        * config/riscv/vector.md (@pred_mul_plus_vx_<mode>): Add new pattern to
        generate vmacc rtl.
        (*pred_macc_<mode>_scalar_undef): Ditto.
        * config/riscv/autovec-opt.md (*vmacc_vx_<mode>): Add new
        pattern to match the vmacc vx combine.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/config/riscv/autovec-opt.md | 23 ++++++++
 gcc/config/riscv/vector.md      | 96 +++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index ed0280fa987..0e37a3bf390 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1824,6 +1824,29 @@ (define_insn_and_split "*merge_vx_<mode>"
   }
   [(set_attr "type" "vimerge")])
 
+(define_insn_and_split "*vmacc_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+   (plus:V_VLSI
+    (mult:V_VLSI
+     (vec_duplicate:V_VLSI
+      (match_operand:<VEL>   1 "register_operand"))
+     (match_operand:V_VLSI   2 "register_operand"))
+    (match_operand:V_VLSI    3 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_mul_plus_vx (<MODE>mode);
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+                RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+    DONE;
+  }
+  [(set_attr "type" "vimuladd")])
+
+
 ;; 
=============================================================================
 ;; Combine vec_duplicate + op.vv to op.vf
 ;; Include
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 66b76701f5a..c11d61a5cd2 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -5490,6 +5490,52 @@ (define_expand "@pred_mul_plus<mode>_scalar"
   "TARGET_VECTOR"
 {})
 
+(define_expand "@pred_mul_plus_vx_<mode>"
+  [(set (match_operand:V_VLSI_QHS       0 "register_operand")
+       (if_then_else:V_VLSI_QHS
+         (unspec:<VM>
+           [(match_operand:<VM>        1 "vector_mask_operand")
+            (match_operand             6 "vector_length_operand")
+            (match_operand             7 "const_int_operand")
+            (match_operand             8 "const_int_operand")
+            (match_operand             9 "const_int_operand")
+            (reg:SI VL_REGNUM)
+            (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+         (plus:V_VLSI_QHS
+           (mult:V_VLSI_QHS
+             (vec_duplicate:V_VLSI_QHS
+               (match_operand:<VEL>    2 "register_operand"))
+             (match_operand:V_VLSI_QHS 3 "register_operand"))
+           (match_operand:V_VLSI_QHS   4 "register_operand"))
+         (match_operand:V_VLSI_QHS     5 "vector_merge_operand")))]
+  "TARGET_VECTOR"
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_mul_plus_vx_<mode>"
+  [(set (match_operand:V_VLSI_D       0 "register_operand")
+       (if_then_else:V_VLSI_D
+         (unspec:<VM>
+           [(match_operand:<VM>      1 "vector_mask_operand")
+            (match_operand           6 "vector_length_operand")
+            (match_operand           7 "const_int_operand")
+            (match_operand           8 "const_int_operand")
+            (match_operand           9 "const_int_operand")
+            (reg:SI VL_REGNUM)
+            (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+         (plus:V_VLSI_D
+           (mult:V_VLSI_D
+             (vec_duplicate:V_VLSI_D
+               (match_operand:<VEL>  2 "register_operand"))
+             (match_operand:V_VLSI_D 3 "register_operand"))
+           (match_operand:V_VLSI_D   4 "register_operand"))
+         (match_operand:V_VLSI_D     5 "vector_merge_operand")))]
+  "TARGET_VECTOR && TARGET_64BIT"
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
 (define_insn "*pred_madd<mode>_scalar"
   [(set (match_operand:V_VLSI 0 "register_operand"            "=vd, vr")
        (if_then_else:V_VLSI
@@ -8839,6 +8885,56 @@ (define_insn 
"@pred_indexed_<order>store<V32T:mode><RATIO2I:mode>"
   [(set_attr "type" "vssegt<order>x")
    (set_attr "mode" "<V32T:MODE>")])
 
+(define_insn "*pred_macc_<mode>_scalar_undef"
+  [(set (match_operand:V_VLSI_QHS       0 "register_operand"      "=vd,  vr")
+       (if_then_else:V_VLSI_QHS
+         (unspec:<VM>
+           [(match_operand:<VM>        1 "vector_mask_operand"   " vm, Wc1")
+            (match_operand             6 "vector_length_operand" "rvl, rvl")
+            (match_operand             7 "const_int_operand"     "  i,   i")
+            (match_operand             8 "const_int_operand"     "  i,   i")
+            (match_operand             9 "const_int_operand"     "  i,   i")
+            (reg:SI VL_REGNUM)
+            (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+         (plus:V_VLSI_QHS
+           (mult:V_VLSI_QHS
+             (vec_duplicate:V_VLSI_QHS
+               (match_operand:<VEL>    3 "reg_or_0_operand"     "  rJ,  rJ"))
+             (match_operand:V_VLSI_QHS 4 "register_operand"     "  vr,  vr"))
+           (match_operand:V_VLSI_QHS   5 "register_operand"     "   0,   0"))
+         (match_operand:V_VLSI_QHS     2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  "@
+   vmacc.vx\t%0,%z3,%4%p1
+   vmacc.vx\t%0,%z3,%4%p1"
+  [(set_attr "type" "vimuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_macc_<mode>_scalar_undef"
+  [(set (match_operand:V_VLSI_D       0 "register_operand"      "=vd,  vr")
+       (if_then_else:V_VLSI_D
+         (unspec:<VM>
+           [(match_operand:<VM>      1 "vector_mask_operand"   " vm, Wc1")
+            (match_operand           6 "vector_length_operand" "rvl, rvl")
+            (match_operand           7 "const_int_operand"     "  i,   i")
+            (match_operand           8 "const_int_operand"     "  i,   i")
+            (match_operand           9 "const_int_operand"     "  i,   i")
+            (reg:SI VL_REGNUM)
+            (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+         (plus:V_VLSI_D
+           (mult:V_VLSI_D
+             (vec_duplicate:V_VLSI_D
+               (match_operand:<VEL>  3 "reg_or_0_operand"     "  rJ,  rJ"))
+             (match_operand:V_VLSI_D 4 "register_operand"     "  vr,  vr"))
+           (match_operand:V_VLSI_D   5 "register_operand"     "   0,   0"))
+         (match_operand:V_VLSI_D     2 "vector_undef_operand")))]
+  "TARGET_VECTOR && TARGET_64BIT"
+  "@
+   vmacc.vx\t%0,%z3,%4%p1
+   vmacc.vx\t%0,%z3,%4%p1"
+  [(set_attr "type" "vimuladd")
+   (set_attr "mode" "<MODE>")])
+
 (include "autovec.md")
 (include "autovec-opt.md")
 (include "sifive-vector.md")
-- 
2.43.0

Reply via email to