https://gcc.gnu.org/g:ba7b5c185596e7ef0d8e2a647a638a87f02a6f01
commit ba7b5c185596e7ef0d8e2a647a638a87f02a6f01 Author: Pan Li <[email protected]> Date: Sat Aug 23 12:55:50 2025 +0800 RISC-V: Combine vec_duplicate + vmacc.vv to vmacc.vx on GR2VR cost This patch would like to combine the vec_duplicate + vmacc.vv to the vmacc.vx. From example as below code. The related pattern will depend on the cost of vec_duplicate from GR2VR. Then the late-combine will take action if the cost of GR2VR is zero, and reject the combination if the GR2VR cost is greater than zero. Assume we have example code like below, GR2VR cost is 0. #define DEF_VX_TERNARY_CASE_0(T, OP_1, OP_2, NAME) \ void \ test_vx_ternary_##NAME##_##T##_case_0 (T * restrict vd, T * restrict vs2, \ T rs1, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ vd[i] = vd[i] OP_2 vs2[i] OP_1 rs1; \ } DEF_VX_TERNARY_CASE_0(int32_t, *, +, macc) Before this patch: 11 │ beq a3,zero,.L8 12 │ vsetvli a5,zero,e32,m1,ta,ma 13 │ vmv.v.x v2,a2 ... 16 │ .L3: 17 │ vsetvli a5,a3,e32,m1,ta,ma ... 22 │ vmacc.vv v1,v2,v3 ... 25 │ bne a3,zero,.L3 After this patch: 11 │ beq a3,zero,.L8 ... 14 │ .L3: 15 │ vsetvli a5,a3,e32,m1,ta,ma ... 20 │ vmacc.vx v1,a2,v3 ... 23 │ bne a3,zero,.L3 gcc/ChangeLog: * config/riscv/vector.md (@pred_mul_plus_vx_<mode>): Add new pattern to generate vmacc rtl. (*pred_macc_<mode>_scalar_undef): Ditto. * config/riscv/autovec-opt.md (*vmacc_vx_<mode>): Add new pattern to match the vmacc vx combine. Signed-off-by: Pan Li <[email protected]> (cherry picked from commit 25037a02716ad0da0f4987960b815dec1f014b1e) Diff: --- gcc/config/riscv/autovec-opt.md | 23 ++++++++++ gcc/config/riscv/vector.md | 96 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index ed0280fa987f..0e37a3bf390a 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1824,6 +1824,29 @@ } [(set_attr "type" "vimerge")]) +(define_insn_and_split "*vmacc_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (plus:V_VLSI + (mult:V_VLSI + (vec_duplicate:V_VLSI + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSI 2 "register_operand")) + (match_operand:V_VLSI 3 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_mul_plus_vx (<MODE>mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], + RVV_VUNDEF(<MODE>mode)}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops); + + DONE; + } + [(set_attr "type" "vimuladd")]) + + ;; ============================================================================= ;; Combine vec_duplicate + op.vv to op.vf ;; Include diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index e6eee4d651e7..9b52a61169d1 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -5501,6 +5501,52 @@ "TARGET_VECTOR" {}) +(define_expand "@pred_mul_plus_vx_<mode>" + [(set (match_operand:V_VLSI_QHS 0 "register_operand") + (if_then_else:V_VLSI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_QHS + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSI_QHS 3 "register_operand")) + (match_operand:V_VLSI_QHS 4 "register_operand")) + (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))] + "TARGET_VECTOR" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + +(define_expand "@pred_mul_plus_vx_<mode>" + [(set (match_operand:V_VLSI_D 0 "register_operand") + (if_then_else:V_VLSI_D + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_D + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSI_D 3 "register_operand")) + (match_operand:V_VLSI_D 4 "register_operand")) + (match_operand:V_VLSI_D 5 "vector_merge_operand")))] + "TARGET_VECTOR && TARGET_64BIT" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + (define_insn "*pred_madd<mode>_scalar" [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vr") (if_then_else:V_VLSI @@ -8850,6 +8896,56 @@ [(set_attr "type" "vssegt<order>x") (set_attr "mode" "<V32T:MODE>")]) +(define_insn "*pred_macc_<mode>_scalar_undef" + [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_QHS + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr")) + (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))] + "TARGET_VECTOR" + "@ + vmacc.vx\t%0,%z3,%4%p1 + vmacc.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pred_macc_<mode>_scalar_undef" + [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_D + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_D + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_D 4 "register_operand" " vr, vr")) + (match_operand:V_VLSI_D 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_D 2 "vector_undef_operand")))] + "TARGET_VECTOR && TARGET_64BIT" + "@ + vmacc.vx\t%0,%z3,%4%p1 + vmacc.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "<MODE>")]) + (include "autovec.md") (include "autovec-opt.md") (include "sifive-vector.md")
