From: Yunze Zhu <yunze...@linux.alibaba.com> This commit add support for xtheadvector-specific fault-only-first segment load/store intrinsics with b/h/w suffix. We also defined enum to be used in thead-vector-builtins-bases.cc https://github.com/XUANTIE-RV/thead-extension-spec/pull/66
gcc/ChangeLog: * config/riscv/riscv-vector-builtins-shapes.cc (struct th_seg_loadstore_def): Define new builtin shapes. * config/riscv/thead-vector-builtins-bases.cc (BASE): New base_name. * config/riscv/thead-vector-builtins-bases.h: New function_base. * config/riscv/thead-vector-builtins-functions.def (th_vlsegff): New intrinsics def. (th_vlseguff): Ditto. * config/riscv/thead-vector.md (UNSPEC_TH_VLSEGBFF): New. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c: New test. * gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c: New test. --- .../riscv/riscv-vector-builtins-shapes.cc | 10 +- .../riscv/thead-vector-builtins-bases.cc | 48 +++++-- .../riscv/thead-vector-builtins-bases.h | 2 + .../riscv/thead-vector-builtins-functions.def | 2 + gcc/config/riscv/thead-vector.md | 13 ++ .../riscv/rvv/xtheadvector/vlsegff-vsseg.c | 118 ++++++++++++++++++ .../riscv/rvv/xtheadvector/vlseguff-vsseg.c | 115 +++++++++++++++++ 7 files changed, 293 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index dab5e1a4e23..2338ffcd484 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -1437,9 +1437,13 @@ struct th_seg_loadstore_def : public build_base { gcc_unreachable (); } - if (strstr (instance.base_name, "l") - && TYPE_UNSIGNED ( (builtin_types[instance.type.index].scalar))) - b.append_name ("u"); + if (strstr (instance.base_name, "l")) + { + if (TYPE_UNSIGNED ( (builtin_types[instance.type.index].scalar))) + b.append_name ("u"); + if (strstr (instance.base_name, "ff")) + b.append_name ("ff"); + } if (!overloaded_p) { diff --git a/gcc/config/riscv/thead-vector-builtins-bases.cc b/gcc/config/riscv/thead-vector-builtins-bases.cc index 5331e8138c1..d3d5f33861f 100644 --- a/gcc/config/riscv/thead-vector-builtins-bases.cc +++ b/gcc/config/riscv/thead-vector-builtins-bases.cc @@ -65,19 +65,39 @@ public: gcc_assert (TARGET_XTHEADVECTOR); unsigned sew = GET_MODE_BITSIZE (GET_MODE_INNER (e.vector_mode ())); int UNSPEC; - switch (sew) + if (!IS_FAULT_ONLY_FIRST) { - case 8: - UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGB : UNSPEC_TH_VLSEGBU; - break; - case 16: - UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGH : UNSPEC_TH_VLSEGHU; - break; - case 32: - UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGW : UNSPEC_TH_VLSEGWU; - break; - default: - gcc_unreachable (); + switch (sew) + { + case 8: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGB : UNSPEC_TH_VLSEGBU; + break; + case 16: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGH : UNSPEC_TH_VLSEGHU; + break; + case 32: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGW : UNSPEC_TH_VLSEGWU; + break; + default: + gcc_unreachable (); + } + } + else + { + switch (sew) + { + case 8: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGBFF : UNSPEC_TH_VLSEGBUFF; + break; + case 16: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGHFF : UNSPEC_TH_VLSEGHUFF; + break; + case 32: + UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGWFF : UNSPEC_TH_VLSEGWUFF; + break; + default: + gcc_unreachable (); + } } return e.use_exact_insn ( code_for_pred_th_unit_seg_load (UNSPEC, e.vector_mode ())); @@ -275,6 +295,8 @@ static CONSTEXPR const th_vssseg th_vssseg_obj; static CONSTEXPR const th_vlxseg<true> th_vlxseg_obj; static CONSTEXPR const th_vlxseg<false> th_vlxsegu_obj; static CONSTEXPR const th_vsxseg th_vsxseg_obj; +static CONSTEXPR const th_vlseg<true, true> th_vlsegff_obj; +static CONSTEXPR const th_vlseg<false, true> th_vlseguff_obj; /* Declare the function base NAME, pointing it to an instance of class <NAME>_obj. */ @@ -291,4 +313,6 @@ BASE (th_vssseg) BASE (th_vlxseg) BASE (th_vlxsegu) BASE (th_vsxseg) +BASE (th_vlsegff) +BASE (th_vlseguff) } // end namespace riscv_vector diff --git a/gcc/config/riscv/thead-vector-builtins-bases.h b/gcc/config/riscv/thead-vector-builtins-bases.h index 35b4ccb379c..3e520d0b22b 100644 --- a/gcc/config/riscv/thead-vector-builtins-bases.h +++ b/gcc/config/riscv/thead-vector-builtins-bases.h @@ -35,6 +35,8 @@ extern const function_base *const th_vssseg; extern const function_base *const th_vlxseg; extern const function_base *const th_vlxsegu; extern const function_base *const th_vsxseg; +extern const function_base *const th_vlsegff; +extern const function_base *const th_vlseguff; } } // end namespace riscv_vector diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def b/gcc/config/riscv/thead-vector-builtins-functions.def index 5cd6f279a32..9dfc2be7ac8 100644 --- a/gcc/config/riscv/thead-vector-builtins-functions.def +++ b/gcc/config/riscv/thead-vector-builtins-functions.def @@ -36,6 +36,8 @@ DEF_RVV_FUNCTION (vsuxw, th_indexed_loadstore_width, none_m_preds, all_v_scalar_ DEF_RVV_FUNCTION (vext_x_v, th_extract, none_preds, iu_x_s_u_ops) DEF_RVV_FUNCTION (th_vlseg, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) DEF_RVV_FUNCTION (th_vlsegu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (th_vlsegff, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (th_vlseguff, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) DEF_RVV_FUNCTION (th_vsseg, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ops) DEF_RVV_FUNCTION (th_vlsseg, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops) DEF_RVV_FUNCTION (th_vlssegu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops) diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md index c9cedcfc08d..5409dce16ff 100644 --- a/gcc/config/riscv/thead-vector.md +++ b/gcc/config/riscv/thead-vector.md @@ -46,6 +46,13 @@ (define_c_enum "unspec" [ UNSPEC_TH_VLXSEGHU UNSPEC_TH_VLXSEGW UNSPEC_TH_VLXSEGWU + + UNSPEC_TH_VLSEGBFF + UNSPEC_TH_VLSEGBUFF + UNSPEC_TH_VLSEGHFF + UNSPEC_TH_VLSEGHUFF + UNSPEC_TH_VLSEGWFF + UNSPEC_TH_VLSEGWUFF ]) (define_int_iterator UNSPEC_TH_VLMEM_OP [ @@ -70,6 +77,9 @@ (define_int_iterator UNSPEC_TH_VLSEGMEM_OP[ UNSPEC_TH_VLSEGB UNSPEC_TH_VLSEGBU UNSPEC_TH_VLSEGH UNSPEC_TH_VLSEGHU UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU + UNSPEC_TH_VLSEGBFF UNSPEC_TH_VLSEGBUFF + UNSPEC_TH_VLSEGHFF UNSPEC_TH_VLSEGHUFF + UNSPEC_TH_VLSEGWFF UNSPEC_TH_VLSEGWUFF ]) (define_int_iterator UNSPEC_TH_VLSSEGMEM_OP[ @@ -106,6 +116,9 @@ (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VLXSEGB "b") (UNSPEC_TH_VLXSEGBU "bu") (UNSPEC_TH_VLXSEGH "h") (UNSPEC_TH_VLXSEGHU "hu") (UNSPEC_TH_VLXSEGW "w") (UNSPEC_TH_VLXSEGWU "wu") + (UNSPEC_TH_VLSEGBFF "bff") (UNSPEC_TH_VLSEGBUFF "buff") + (UNSPEC_TH_VLSEGHFF "hff") (UNSPEC_TH_VLSEGHUFF "huff") + (UNSPEC_TH_VLSEGWFF "wff") (UNSPEC_TH_VLSEGWUFF "wuff") ]) (define_int_attr vlmem_order_attr [ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c new file mode 100644 index 00000000000..359f6766e6a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c @@ -0,0 +1,118 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f1 (void * in, void *out) +{ + vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tu (v, in, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} + +/* +** f2: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f2 (void * in, void *out, vbool16_t mask) +{ + vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_m (mask, in, 4); + vint16m1_t v_0 = __riscv_vget_i16m1(v, 0); + vint16m1_t v_1 = __riscv_vget_i16m1(v, 1); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} + +/* +** f3: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f3 (void * in, void *out, vbool16_t mask) +{ + vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tumu (mask, v, in, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c new file mode 100644 index 00000000000..95580c2a50b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c @@ -0,0 +1,115 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f1(void *in, void *out) { + vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tu(v, in, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} + +/* +** f2: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f2(void *in, void *out, vbool16_t mask) { + vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_m(mask, in, 4); + vuint16m1_t v_0 = __riscv_vget_u16m1(v, 0); + vuint16m1_t v_1 = __riscv_vget_u16m1(v, 1); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} + +/* +** f3: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f3(void *in, void *out, vbool16_t mask) { + vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tumu(mask, v, in, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu(mask, v3_0, v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu(mask, v3_1, v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu(mask, v4_0, v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu(mask, v4_1, v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} -- 2.47.1