LGTM :) Paul-Antoine Arras <par...@baylibre.com> 於 2025年9月9日 週二 22:48 寫道:
> This pattern enables the combine pass (or late-combine, depending on the > case) > to merge a float_extend'ed vec_duplicate into a minus RTL instruction. Both > minus operands are widened. > > Before this patch, we have six instructions, e.g.: > fcvt.d.s fa0,fa0 > vsetvli a5,zero,e64,m1,ta,ma > vfmv.v.f v3,fa0 > vfwcvt.f.f.v v1,v2 > vsetvli zero,zero,e64,m1,ta,ma > vfsub.vv v1,v1,v3 > > After, we get only one: > vfwsub.vf v1,v2,fa0 > > gcc/ChangeLog: > > * config/riscv/autovec-opt.md (*vfwsub_vf_<mode>): New pattern to > combine float_extend + vec_duplicate + vfwsub.vv into vfwsub.vf. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwsub.vf. > * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h > (DEF_VF_BINOP_WIDEN_CASE_0, DEF_VF_BINOP_WIDEN_CASE_1): Swap > operands. > * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: > Likewise. > * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c: New > test. > * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c: New > test. > --- > gcc/config/riscv/autovec-opt.md | 23 +++++++++++++++++++ > .../riscv/rvv/autovec/vx_vf/vf-1-f16.c | 2 ++ > .../riscv/rvv/autovec/vx_vf/vf-1-f32.c | 2 ++ > .../riscv/rvv/autovec/vx_vf/vf-2-f16.c | 3 ++- > .../riscv/rvv/autovec/vx_vf/vf-2-f32.c | 3 ++- > .../riscv/rvv/autovec/vx_vf/vf-3-f16.c | 2 ++ > .../riscv/rvv/autovec/vx_vf/vf-3-f32.c | 2 ++ > .../riscv/rvv/autovec/vx_vf/vf-4-f16.c | 1 + > .../riscv/rvv/autovec/vx_vf/vf-4-f32.c | 1 + > .../riscv/rvv/autovec/vx_vf/vf_binop.h | 10 ++++---- > .../rvv/autovec/vx_vf/vf_binop_widen_run.h | 2 +- > .../rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c | 20 ++++++++++++++++ > .../rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c | 16 +++++++++++++ > 13 files changed, 79 insertions(+), 8 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c > > diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md > index 5512c46fa8e..02f19bc6a42 100644 > --- gcc/config/riscv/autovec-opt.md > +++ gcc/config/riscv/autovec-opt.md > @@ -2215,6 +2215,29 @@ (define_insn_and_split "*vfwadd_wf_<mode>" > [(set_attr "type" "vfwalu")] > ) > > +;; vfwsub.vf > +(define_insn_and_split "*vfwsub_vf_<mode>" > + [(set (match_operand:VWEXTF 0 "register_operand") > + (minus:VWEXTF > + (float_extend:VWEXTF > + (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) > + (vec_duplicate:VWEXTF > + (float_extend:<VEL> > + (match_operand:<VSUBEL> 2 "register_operand")))))] > + "TARGET_VECTOR && can_create_pseudo_p ()" > + "#" > + "&& 1" > + [(const_int 0)] > + { > + riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MINUS, > + > <MODE>mode), > + riscv_vector::BINARY_OP_FRM_DYN, > operands); > + > + DONE; > + } > + [(set_attr "type" "vfwalu")] > +) > + > ;; vfadd.vf > (define_insn_and_split "*vfadd_vf_<mode>" > [(set (match_operand:V_VLSF 0 "register_operand") > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c > index fed5d3b6001..20e809010d8 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c > @@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP > (_Float16), max) > DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max) > DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul) > DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, +, add) > +DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, -, sub) > DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add) > > /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ > @@ -50,4 +51,5 @@ DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add) > /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */ > /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */ > /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */ > +/* { dg-final { scan-assembler-times {vfwsub.vf} 1 } } */ > /* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c > index 82d64d11c87..8ecd7d0fa00 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c > @@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP > (float), max) > DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max) > DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul) > DEF_VF_BINOP_WIDEN_CASE_0 (float, double, +, add) > +DEF_VF_BINOP_WIDEN_CASE_0 (float, double, -, sub) > DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add) > > /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ > @@ -50,4 +51,5 @@ DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add) > /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */ > /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */ > /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */ > +/* { dg-final { scan-assembler-times {vfwsub.vf} 1 } } */ > /* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c > index eef86749c50..8fe361f4f70 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c > @@ -24,5 +24,6 @@ > /* { dg-final { scan-assembler-not {vfmax.vf} } } */ > /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ > +/* { dg-final { scan-assembler-not {vfwsub.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.wf} } } */ > -/* { dg-final { scan-assembler-times {fcvt.s.h} 7 } } */ > +/* { dg-final { scan-assembler-times {fcvt.s.h} 8 } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c > index c5e93060e02..a1eaaa8b47f 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c > @@ -24,5 +24,6 @@ > /* { dg-final { scan-assembler-not {vfmax.vf} } } */ > /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ > +/* { dg-final { scan-assembler-not {vfwsub.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.wf} } } */ > -/* { dg-final { scan-assembler-times {fcvt.d.s} 7 } } */ > +/* { dg-final { scan-assembler-times {fcvt.d.s} 8 } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c > index f0c6594533e..f799437d3ca 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c > @@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP > (_Float16), max, > VF_BINOP_FUNC_BODY_X128) > DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul) > DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, +, add) > +DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, -, sub) > DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add) > > /* { dg-final { scan-assembler {vfmadd.vf} } } */ > @@ -54,4 +55,5 @@ DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add) > /* { dg-final { scan-assembler {vfmax.vf} } } */ > /* { dg-final { scan-assembler {vfwmul.vf} } } */ > /* { dg-final { scan-assembler {vfwadd.vf} } } */ > +/* { dg-final { scan-assembler {vfwsub.vf} } } */ > /* { dg-final { scan-assembler {vfwadd.wf} } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c > index 60617c3ec9b..bb987e1edc0 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c > @@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP > (float), max, > VF_BINOP_FUNC_BODY_X128) > DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul) > DEF_VF_BINOP_WIDEN_CASE_1 (float, double, +, add) > +DEF_VF_BINOP_WIDEN_CASE_1 (float, double, -, sub) > DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add) > > /* { dg-final { scan-assembler {vfmadd.vf} } } */ > @@ -54,4 +55,5 @@ DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add) > /* { dg-final { scan-assembler {vfmax.vf} } } */ > /* { dg-final { scan-assembler {vfwmul.vf} } } */ > /* { dg-final { scan-assembler {vfwadd.vf} } } */ > +/* { dg-final { scan-assembler {vfwsub.vf} } } */ > /* { dg-final { scan-assembler {vfwadd.wf} } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c > index 0650265b6c0..50a4968718b 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c > @@ -24,5 +24,6 @@ > /* { dg-final { scan-assembler-not {vfmax.vf} } } */ > /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ > +/* { dg-final { scan-assembler-not {vfwsub.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.wf} } } */ > /* { dg-final { scan-assembler {fcvt.s.h} } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c > index b43699deb83..2e7ef538215 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c > @@ -24,5 +24,6 @@ > /* { dg-final { scan-assembler-not {vfmax.vf} } } */ > /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ > +/* { dg-final { scan-assembler-not {vfwsub.vf} } } */ > /* { dg-final { scan-assembler-not {vfwadd.wf} } } */ > /* { dg-final { scan-assembler {fcvt.d.s} } } */ > diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h > index 2a55c9c6df9..479a6fa7222 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h > @@ -37,7 +37,7 @@ > unsigned n) > \ > { > \ > for (unsigned i = 0; i < n; i++) > \ > - out[i] = (T2) f OP (T2) in[i]; > \ > + out[i] = (T2) in[i] OP (T2) f; > \ > } > #define DEF_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NAME) > \ > DEF_VF_BINOP_WIDEN_CASE_0 (T1, T2, OP, NAME) > @@ -246,10 +246,10 @@ DEF_MAX_1 (double) > { > \ > for (int i = 0; i < n; i++) > \ > { > \ > - dst[i] = (TYPE2) * a OP (TYPE2) b[i]; > \ > - dst2[i] = (TYPE2) * a2 OP (TYPE2) b[i]; > \ > - dst3[i] = (TYPE2) * a2 OP (TYPE2) a[i]; > \ > - dst4[i] = (TYPE2) * a OP (TYPE2) b2[i]; > \ > + dst[i] = (TYPE2) b[i] OP (TYPE2) * a; > \ > + dst2[i] = (TYPE2) b[i] OP (TYPE2) * a2; > \ > + dst3[i] = (TYPE2) a[i] OP (TYPE2) * a2; > \ > + dst4[i] = (TYPE2) b2[i] OP (TYPE2) * a; > \ > } > \ > } > > diff --git > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h > index 8748cda21aa..1c9dc8c5e7b 100644 > --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h > @@ -28,7 +28,7 @@ int main () > TEST_RUN (T1, T2, NAME, out, in, f, N); > > for (int i = 0; i < N; i++) > - assert (out[i] == ((T2) f OP (T2) in[i])); > + assert (out[i] == ((T2) in[i] OP (T2) f)); > > return 0; > } > diff --git > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c > new file mode 100644 > index 00000000000..6269073b940 > --- /dev/null > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c > @@ -0,0 +1,20 @@ > +/* { dg-do run { target { riscv_v } } } */ > +/* { dg-require-effective-target riscv_v_ok } */ > +/* { dg-require-effective-target riscv_zvfh_ok } */ > +/* { dg-add-options "riscv_v" } */ > +/* { dg-add-options "riscv_zvfh" } */ > +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ > + > +#include "vf_binop.h" > + > +#define T1 _Float16 > +#define T2 float > +#define NAME sub > +#define OP - > + > +DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME) > + > +#define TEST_RUN(T1, T2, NAME, out, in, f, n) > RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) > +#define LIMIT -32768 > + > +#include "vf_binop_widen_run.h" > diff --git > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c > gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c > new file mode 100644 > index 00000000000..9e23db1f8af > --- /dev/null > +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c > @@ -0,0 +1,16 @@ > +/* { dg-do run { target { riscv_v } } } */ > +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ > + > +#include "vf_binop.h" > + > +#define T1 float > +#define T2 double > +#define NAME sub > +#define OP - > + > +DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME) > + > +#define TEST_RUN(T1, T2, NAME, out, in, f, n) > RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) > +#define LIMIT -2147483648 > + > +#include "vf_binop_widen_run.h" > -- > 2.39.5 > >