https://gcc.gnu.org/g:4488152579989e8921e3b56e221fb96c1a6fdcd5
commit r16-3656-g4488152579989e8921e3b56e221fb96c1a6fdcd5 Author: Paul-Antoine Arras <par...@baylibre.com> Date: Wed Sep 3 14:29:13 2025 +0200 RISC-V: Add pattern for vector-scalar widening floating-point multiply This pattern enables the combine pass (or late-combine, depending on the case) to merge a float_extend'ed vec_duplicate into a mult RTL instruction. Before this patch, we have six instructions, e.g.: fcvt.d.s fa0,fa0 vsetvli a5,zero,e64,m1,ta,ma vfmv.v.f v3,fa0 vfwcvt.f.f.v v1,v2 vsetvli zero,zero,e64,m1,ta,ma vfmul.vv v1,v3,v1 After, we get only one: vfwmul.vf v1,v2,fa0 gcc/ChangeLog: * config/riscv/autovec-opt.md (*vfwmul_vf_<mode>): New pattern to combine float_extend + vec_duplicate + vfmul.vv into vfmul.vf. * config/riscv/vector.md (*@pred_dual_widen_<optab><mode>_scalar): Swap operands to match the RTL emitted by expand, i.e. first float_extend then vec_duplicate. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwmul. * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h: Add support for widening variants. * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: New test helper. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c: New test. Diff: --- gcc/config/riscv/autovec-opt.md | 23 +++++++++++++++ gcc/config/riscv/vector.md | 4 +-- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c | 3 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c | 3 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h | 34 ++++++++++++++++++++-- .../riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h | 32 ++++++++++++++++++++ .../riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c | 20 +++++++++++++ .../riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c | 16 ++++++++++ 14 files changed, 139 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index d2a89a5d63b4..82a5fa0fae90 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -2146,3 +2146,26 @@ } [(set_attr "type" "vfminmax")] ) + +;; vfwmul.vf +(define_insn_and_split "*vfwmul_vf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (mult:VWEXTF + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 2 "register_operand")))))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MULT, + <MODE>mode), + riscv_vector::BINARY_OP_FRM_DYN, operands); + + DONE; + } + [(set_attr "type" "vfwmul")] +) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 95d44baf6fdd..d62f8bb2cd2e 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7250,8 +7250,8 @@ (any_widen_binop:VWEXTF (float_extend:VWEXTF (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" " vr, vr")) - (float_extend:VWEXTF - (vec_duplicate:<V_DOUBLE_TRUNC> + (vec_duplicate:VWEXTF + (float_extend:<VEL> (match_operand:<VSUBEL> 4 "register_operand" " f, f")))) (match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0")))] "TARGET_VECTOR" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c index 0be64f1fd646..cbec87e6c0b3 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c @@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_0_WRAP (_Float16), min) DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_1_WRAP (_Float16), min) DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max) DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max) +DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max) /* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */ /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */ +/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c index a9cd38aebeb1..b6d94c650b0c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c @@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_0_WRAP (float), min) DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_1_WRAP (float), min) DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max) DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max) +DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max) /* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */ /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */ +/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c index 0db3048688ce..3f31568825e4 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c @@ -19,4 +19,5 @@ /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */ /* { dg-final { scan-assembler-not {vfmin.vf} } } */ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ -/* { dg-final { scan-assembler-times {fcvt.s.h} 4 } } */ +/* { dg-final { scan-assembler-not {vfwmul.vf} } } */ +/* { dg-final { scan-assembler-times {fcvt.s.h} 5 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c index 494b33e45b29..21a3e1d16aa5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c @@ -19,4 +19,5 @@ /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */ /* { dg-final { scan-assembler-not {vfmin.vf} } } */ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ -/* { dg-final { scan-assembler-times {fcvt.d.s} 4 } } */ +/* { dg-final { scan-assembler-not {vfwmul.vf} } } */ +/* { dg-final { scan-assembler-times {fcvt.d.s} 5 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c index c2c4f430b154..522b5d294649 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c @@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max, VF_BINOP_FUNC_BODY_X128) DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max, VF_BINOP_FUNC_BODY_X128) +DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max, /* { dg-final { scan-assembler {vfrdiv.vf} } } */ /* { dg-final { scan-assembler {vfmin.vf} } } */ /* { dg-final { scan-assembler {vfmax.vf} } } */ +/* { dg-final { scan-assembler {vfwmul.vf} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c index f2582cad8b37..8d05c54c7729 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c @@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_0_WRAP (float), max, VF_BINOP_FUNC_BODY_X128) DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max, VF_BINOP_FUNC_BODY_X128) +DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max, /* { dg-final { scan-assembler {vfrdiv.vf} } } */ /* { dg-final { scan-assembler {vfmin.vf} } } */ /* { dg-final { scan-assembler {vfmax.vf} } } */ +/* { dg-final { scan-assembler {vfwmul.vf} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c index 3fa31504cfef..3058367785e9 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c @@ -19,4 +19,5 @@ /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */ /* { dg-final { scan-assembler-not {vfmin.vf} } } */ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmul.vf} } } */ /* { dg-final { scan-assembler {fcvt.s.h} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c index 3d526b56e01a..d687f8f6f624 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c @@ -19,4 +19,5 @@ /* { dg-final { scan-assembler-not {vfrdiv.vf} } } */ /* { dg-final { scan-assembler-not {vfmin.vf} } } */ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ +/* { dg-final { scan-assembler-not {vfwmul.vf} } } */ /* { dg-final { scan-assembler {fcvt.d.s} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h index da02065dda82..c9253e9867a4 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h @@ -31,6 +31,21 @@ #define RUN_VF_BINOP_REVERSE_CASE_0_WRAP(T, NAME, out, in, f, n) \ RUN_VF_BINOP_REVERSE_CASE_0 (T, NAME, out, in, f, n) +#define DEF_VF_BINOP_WIDEN_CASE_0(T1, T2, OP, NAME) \ + void test_vf_binop_widen_##NAME##_##T1##_case_0 (T2 *restrict out, \ + T1 *restrict in, T1 f, \ + unsigned n) \ + { \ + for (unsigned i = 0; i < n; i++) \ + out[i] = (T2) f * (T2) in[i]; \ + } +#define DEF_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NAME) \ + DEF_VF_BINOP_WIDEN_CASE_0 (T1, T2, OP, NAME) +#define RUN_VF_BINOP_WIDEN_CASE_0(T1, T2, NAME, out, in, f, n) \ + test_vf_binop_widen_##NAME##_##T1##_case_0 (out, in, f, n) +#define RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) \ + RUN_VF_BINOP_WIDEN_CASE_0 (T1, T2, NAME, out, in, f, n) + #define VF_BINOP_BODY(op) \ out[k + 0] = in[k + 0] op tmp; \ out[k + 1] = in[k + 1] op tmp; \ @@ -122,8 +137,6 @@ #define DEF_VF_BINOP_REVERSE_CASE_1_WRAP(T, OP, NAME, BODY) \ DEF_VF_BINOP_REVERSE_CASE_1 (T, OP, NAME, BODY) -#endif - #define DEF_MIN_0(T) \ static inline T test_##T##_min_0 (T a, T b) { return a > b ? b : a; } @@ -224,3 +237,20 @@ DEF_MAX_1 (double) #define VF_BINOP_FUNC_BODY_X128(op) \ VF_BINOP_FUNC_BODY_X64 (op) \ VF_BINOP_FUNC_BODY_X64 (op) + +#define DEF_VF_BINOP_WIDEN_CASE_1(TYPE1, TYPE2, OP, NAME) \ + void test_vf_binop_widen_##NAME##_##TYPE1##_##TYPE2##_case_1 ( \ + TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3, \ + TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE1 *__restrict b, \ + TYPE1 *__restrict a2, TYPE1 *__restrict b2, int n) \ + { \ + for (int i = 0; i < n; i++) \ + { \ + dst[i] = (TYPE2) * a OP (TYPE2) b[i]; \ + dst2[i] = (TYPE2) * a2 OP (TYPE2) b[i]; \ + dst3[i] = (TYPE2) * a2 OP (TYPE2) a[i]; \ + dst4[i] = (TYPE2) * a OP (TYPE2) b2[i]; \ + } \ + } + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h new file mode 100644 index 000000000000..929b2dec85da --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h @@ -0,0 +1,32 @@ +#ifndef HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H +#define HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H + +#include <assert.h> + +#define N 512 + +int main () +{ + T1 f; + T1 in[N]; + T2 out[N]; + T2 out2[N]; + + f = LIMIT % 8723; + for (int i = 0; i < N; i++) + { + in[i] = LIMIT + i & 1964; + out[i] = LIMIT + i & 628; + out2[i] = LIMIT + i & 628; + asm volatile ("" ::: "memory"); + } + + TEST_RUN (T1, T2, NAME, out, in, f, N); + + for (int i = 0; i < N; i++) + assert (out[i] == ((T2) f OP (T2) in[i])); + + return 0; +} + +#endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c new file mode 100644 index 000000000000..1ba84e59b017 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c @@ -0,0 +1,20 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-require-effective-target riscv_zvfh_ok } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvfh" } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_binop.h" + +#define T1 _Float16 +#define T2 float +#define NAME mul +#define OP * + +DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -32768 + +#include "vf_binop_widen_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c new file mode 100644 index 000000000000..52fbeb377eb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_binop.h" + +#define T1 float +#define T2 double +#define NAME mul +#define OP * + +DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -2147483648 + +#include "vf_binop_widen_run.h"