From: Yunze Zhu <yunze...@linux.alibaba.com> This commit add support for xtheadvector-specific fault-only-first segment load/store intrinsics with b/h/w suffix. We also defined enum to be used in thead-vector.md https://github.com/XUANTIE-RV/thead-extension-spec/pull/66
V2: Change to reuse existed thead function base th_loadstore_width. gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (BASE): New base_name. * config/riscv/riscv-vector-builtins-bases.h: New function_base. * config/riscv/riscv-vector-builtins-shapes.cc (build): Define new builtin shapes. * config/riscv/thead-vector-builtins-functions.def (vlsegbff): New intrinsics def. (vlseghff): Ditto. (vlsegwff): Ditto. (vlsegbuff): Ditto. (vlseghuff): Ditto. (vlsegwuff): Ditto. * config/riscv/thead-vector.md: (UNSPEC_TH_VLSEGBFF): New. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c: New test. * gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c: New test. --- .../riscv/riscv-vector-builtins-bases.cc | 12 ++ .../riscv/riscv-vector-builtins-bases.h | 6 + .../riscv/riscv-vector-builtins-shapes.cc | 10 +- .../riscv/thead-vector-builtins-functions.def | 6 + gcc/config/riscv/thead-vector.md | 13 ++ .../riscv/rvv/xtheadvector/vlsegff-vsseg.c | 118 ++++++++++++++++++ .../riscv/rvv/xtheadvector/vlseguff-vsseg.c | 115 +++++++++++++++++ 7 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 380c695ccb1..16dc8c5e752 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2818,6 +2818,12 @@ static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, true, UNSPEC_TH_VL static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true, UNSPEC_TH_VLXSEGB> vsxsegb_obj; static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true, UNSPEC_TH_VLXSEGH> vsxsegh_obj; static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true, UNSPEC_TH_VLXSEGW> vsxsegw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGBFF> vlsegbff_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGHFF> vlseghff_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGWFF> vlsegwff_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGBUFF> vlsegbuff_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGHUFF> vlseghuff_obj; +static CONSTEXPR const th_loadstore_width<false, LST_UNIT_STRIDE, true, UNSPEC_TH_VLSEGWUFF> vlsegwuff_obj; /* Crypto Vector */ static CONSTEXPR const vandn vandn_obj; @@ -3175,6 +3181,12 @@ BASE (vlxsegwu) BASE (vsxsegb) BASE (vsxsegh) BASE (vsxsegw) +BASE (vlsegbff) +BASE (vlseghff) +BASE (vlsegwff) +BASE (vlsegbuff) +BASE (vlseghuff) +BASE (vlsegwuff) /* Crypto vector */ BASE (vandn) BASE (vbrev) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index 9a8d378019e..eedfff8a922 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -344,6 +344,12 @@ extern const function_base *const vlxsegwu; extern const function_base *const vsxsegb; extern const function_base *const vsxsegh; extern const function_base *const vsxsegw; +extern const function_base *const vlsegbff; +extern const function_base *const vlseghff; +extern const function_base *const vlsegwff; +extern const function_base *const vlsegbuff; +extern const function_base *const vlseghuff; +extern const function_base *const vlsegwuff; /* Below function_base are Vectro Crypto*/ extern const function_base *const vandn; extern const function_base *const vbrev; diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index 10c096b2e44..e27fda6d9c9 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -1448,9 +1448,13 @@ void build (function_builder &b, else gcc_unreachable (); - if (strstr (instance.base_name, "l") - && TYPE_UNSIGNED (builtin_types[instance.type.index].scalar)) - b.append_name ("u"); + if (strstr (instance.base_name, "l")) + { + if (TYPE_UNSIGNED (builtin_types[instance.type.index].scalar)) + b.append_name ("u"); + if (strstr (instance.base_name, "ff")) + b.append_name ("ff"); + } if (!overloaded_p) { diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def b/gcc/config/riscv/thead-vector-builtins-functions.def index 2d28b4eb33e..ba8172fa337 100644 --- a/gcc/config/riscv/thead-vector-builtins-functions.def +++ b/gcc/config/riscv/thead-vector-builtins-functions.def @@ -61,6 +61,12 @@ DEF_RVV_FUNCTION (vlxsegwu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar DEF_RVV_FUNCTION (vsxsegb, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_index_ops) DEF_RVV_FUNCTION (vsxsegh, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_index_ops) DEF_RVV_FUNCTION (vsxsegw, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_index_ops) +DEF_RVV_FUNCTION (vlsegbff, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlseghff, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlsegwff, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlsegbuff, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlseghuff, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) +DEF_RVV_FUNCTION (vlsegwuff, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_ops) #undef REQUIRED_EXTENSIONS #undef DEF_RVV_FUNCTION diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md index c9cedcfc08d..5409dce16ff 100644 --- a/gcc/config/riscv/thead-vector.md +++ b/gcc/config/riscv/thead-vector.md @@ -46,6 +46,13 @@ (define_c_enum "unspec" [ UNSPEC_TH_VLXSEGHU UNSPEC_TH_VLXSEGW UNSPEC_TH_VLXSEGWU + + UNSPEC_TH_VLSEGBFF + UNSPEC_TH_VLSEGBUFF + UNSPEC_TH_VLSEGHFF + UNSPEC_TH_VLSEGHUFF + UNSPEC_TH_VLSEGWFF + UNSPEC_TH_VLSEGWUFF ]) (define_int_iterator UNSPEC_TH_VLMEM_OP [ @@ -70,6 +77,9 @@ (define_int_iterator UNSPEC_TH_VLSEGMEM_OP[ UNSPEC_TH_VLSEGB UNSPEC_TH_VLSEGBU UNSPEC_TH_VLSEGH UNSPEC_TH_VLSEGHU UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU + UNSPEC_TH_VLSEGBFF UNSPEC_TH_VLSEGBUFF + UNSPEC_TH_VLSEGHFF UNSPEC_TH_VLSEGHUFF + UNSPEC_TH_VLSEGWFF UNSPEC_TH_VLSEGWUFF ]) (define_int_iterator UNSPEC_TH_VLSSEGMEM_OP[ @@ -106,6 +116,9 @@ (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VLXSEGB "b") (UNSPEC_TH_VLXSEGBU "bu") (UNSPEC_TH_VLXSEGH "h") (UNSPEC_TH_VLXSEGHU "hu") (UNSPEC_TH_VLXSEGW "w") (UNSPEC_TH_VLXSEGWU "wu") + (UNSPEC_TH_VLSEGBFF "bff") (UNSPEC_TH_VLSEGBUFF "buff") + (UNSPEC_TH_VLSEGHFF "hff") (UNSPEC_TH_VLSEGHUFF "huff") + (UNSPEC_TH_VLSEGWFF "wff") (UNSPEC_TH_VLSEGWUFF "wuff") ]) (define_int_attr vlmem_order_attr [ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c new file mode 100644 index 00000000000..359f6766e6a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c @@ -0,0 +1,118 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f1 (void * in, void *out) +{ + vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tu (v, in, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} + +/* +** f2: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f2 (void * in, void *out, vbool16_t mask) +{ + vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_m (mask, in, 4); + vint16m1_t v_0 = __riscv_vget_i16m1(v, 0); + vint16m1_t v_1 = __riscv_vget_i16m1(v, 1); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} + +/* +** f3: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f3 (void * in, void *out, vbool16_t mask) +{ + vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4); + vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tumu (mask, v, in, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c new file mode 100644 index 00000000000..95580c2a50b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c @@ -0,0 +1,115 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f1(void *in, void *out) { + vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tu(v, in, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} + +/* +** f2: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f2(void *in, void *out, vbool16_t mask) { + vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_m(mask, in, 4); + vuint16m1_t v_0 = __riscv_vget_u16m1(v, 0); + vuint16m1_t v_1 = __riscv_vget_u16m1(v, 1); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} + +/* +** f3: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\) +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\) +** ret +*/ +void f3(void *in, void *out, vbool16_t mask) { + vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4); + vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tumu(mask, v, in, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu(mask, v3_0, v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu(mask, v3_1, v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu(mask, v4_0, v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu(mask, v4_1, v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0); + v4 = __riscv_vset(v4, 1, v4_1); + __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4); +} -- 2.47.1