store intrinsics

yunzezhu Fri, 06 Jun 2025 02:40:34 -0700

From: Yunze Zhu <yunze...@linux.alibaba.com>

This commit add support for xtheadvector-specific fault-only-first segment
load/store intrinsics with b/h/w suffix. We also defined enum to be used
in thead-vector-builtins-bases.cc
https://github.com/XUANTIE-RV/thead-extension-spec/pull/66


gcc/ChangeLog:

        * config/riscv/riscv-vector-builtins-shapes.cc (struct 
th_seg_loadstore_def):
        Define new builtin shapes.
        * config/riscv/thead-vector-builtins-bases.cc (BASE): New base_name.
        * config/riscv/thead-vector-builtins-bases.h: New function_base.
        * config/riscv/thead-vector-builtins-functions.def (th_vlsegff): New 
intrinsics def.
        (th_vlseguff): Ditto.
        * config/riscv/thead-vector.md (UNSPEC_TH_VLSEGBFF): New.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c: New test.
        * gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c: New test.
---
 .../riscv/riscv-vector-builtins-shapes.cc     |  10 +-
 .../riscv/thead-vector-builtins-bases.cc      |  48 +++++--
 .../riscv/thead-vector-builtins-bases.h       |   2 +
 .../riscv/thead-vector-builtins-functions.def |   2 +
 gcc/config/riscv/thead-vector.md              |  13 ++
 .../riscv/rvv/xtheadvector/vlsegff-vsseg.c    | 118 ++++++++++++++++++
 .../riscv/rvv/xtheadvector/vlseguff-vsseg.c   | 115 +++++++++++++++++
 7 files changed, 293 insertions(+), 15 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index dab5e1a4e23..2338ffcd484 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -1437,9 +1437,13 @@ struct th_seg_loadstore_def : public build_base {
       gcc_unreachable ();
     }
 
-    if (strstr (instance.base_name, "l")
-       && TYPE_UNSIGNED ( (builtin_types[instance.type.index].scalar)))
-      b.append_name ("u");
+    if (strstr (instance.base_name, "l"))
+    {
+      if (TYPE_UNSIGNED ( (builtin_types[instance.type.index].scalar)))
+       b.append_name ("u");
+      if (strstr (instance.base_name, "ff"))
+       b.append_name ("ff");
+    }
 
     if (!overloaded_p)
     {
diff --git a/gcc/config/riscv/thead-vector-builtins-bases.cc 
b/gcc/config/riscv/thead-vector-builtins-bases.cc
index 5331e8138c1..d3d5f33861f 100644
--- a/gcc/config/riscv/thead-vector-builtins-bases.cc
+++ b/gcc/config/riscv/thead-vector-builtins-bases.cc
@@ -65,19 +65,39 @@ public:
     gcc_assert (TARGET_XTHEADVECTOR);
     unsigned sew = GET_MODE_BITSIZE (GET_MODE_INNER (e.vector_mode ()));
     int UNSPEC;
-    switch (sew)
+    if (!IS_FAULT_ONLY_FIRST)
     {
-      case 8:
-       UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGB : UNSPEC_TH_VLSEGBU;
-       break;
-      case 16:
-       UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGH : UNSPEC_TH_VLSEGHU;
-       break;
-      case 32:
-       UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGW : UNSPEC_TH_VLSEGWU;
-       break;
-      default:
-       gcc_unreachable ();
+      switch (sew)
+      {
+       case 8:
+         UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGB : UNSPEC_TH_VLSEGBU;
+         break;
+       case 16:
+         UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGH : UNSPEC_TH_VLSEGHU;
+         break;
+       case 32:
+         UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGW : UNSPEC_TH_VLSEGWU;
+         break;
+       default:
+         gcc_unreachable ();
+      }
+    }
+    else
+    {
+      switch (sew)
+      {
+       case 8:
+         UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGBFF : UNSPEC_TH_VLSEGBUFF;
+         break;
+       case 16:
+         UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGHFF : UNSPEC_TH_VLSEGHUFF;
+         break;
+       case 32:
+         UNSPEC = IS_SIGNED ? UNSPEC_TH_VLSEGWFF : UNSPEC_TH_VLSEGWUFF;
+         break;
+       default:
+         gcc_unreachable ();
+      }
     }
     return e.use_exact_insn (
        code_for_pred_th_unit_seg_load (UNSPEC, e.vector_mode ()));
@@ -275,6 +295,8 @@ static CONSTEXPR const th_vssseg th_vssseg_obj;
 static CONSTEXPR const th_vlxseg<true> th_vlxseg_obj;
 static CONSTEXPR const th_vlxseg<false> th_vlxsegu_obj;
 static CONSTEXPR const th_vsxseg th_vsxseg_obj;
+static CONSTEXPR const th_vlseg<true, true> th_vlsegff_obj;
+static CONSTEXPR const th_vlseg<false, true> th_vlseguff_obj;
 
 /* Declare the function base NAME, pointing it to an instance
    of class <NAME>_obj.  */
@@ -291,4 +313,6 @@ BASE (th_vssseg)
 BASE (th_vlxseg)
 BASE (th_vlxsegu)
 BASE (th_vsxseg)
+BASE (th_vlsegff)
+BASE (th_vlseguff)
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/thead-vector-builtins-bases.h 
b/gcc/config/riscv/thead-vector-builtins-bases.h
index 35b4ccb379c..3e520d0b22b 100644
--- a/gcc/config/riscv/thead-vector-builtins-bases.h
+++ b/gcc/config/riscv/thead-vector-builtins-bases.h
@@ -35,6 +35,8 @@ extern const function_base *const th_vssseg;
 extern const function_base *const th_vlxseg;
 extern const function_base *const th_vlxsegu;
 extern const function_base *const th_vsxseg;
+extern const function_base *const th_vlsegff;
+extern const function_base *const th_vlseguff;
 }
 
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def 
b/gcc/config/riscv/thead-vector-builtins-functions.def
index 5cd6f279a32..9dfc2be7ac8 100644
--- a/gcc/config/riscv/thead-vector-builtins-functions.def
+++ b/gcc/config/riscv/thead-vector-builtins-functions.def
@@ -36,6 +36,8 @@ DEF_RVV_FUNCTION (vsuxw, th_indexed_loadstore_width, 
none_m_preds, all_v_scalar_
 DEF_RVV_FUNCTION (vext_x_v, th_extract, none_preds, iu_x_s_u_ops)
 DEF_RVV_FUNCTION (th_vlseg, th_seg_loadstore, full_preds, 
th_tuple_v_sint_scalar_const_ptr_ops)
 DEF_RVV_FUNCTION (th_vlsegu, th_seg_loadstore, full_preds, 
th_tuple_v_uint_scalar_const_ptr_ops)
+DEF_RVV_FUNCTION (th_vlsegff, th_seg_loadstore, full_preds, 
th_tuple_v_sint_scalar_const_ptr_ops)
+DEF_RVV_FUNCTION (th_vlseguff, th_seg_loadstore, full_preds, 
th_tuple_v_uint_scalar_const_ptr_ops)
 DEF_RVV_FUNCTION (th_vsseg, th_seg_loadstore, none_m_preds, 
th_tuple_v_int_scalar_ptr_ops)
 DEF_RVV_FUNCTION (th_vlsseg, th_seg_loadstore, full_preds, 
th_tuple_v_sint_scalar_const_ptr_ptrdiff_ops)
 DEF_RVV_FUNCTION (th_vlssegu, th_seg_loadstore, full_preds, 
th_tuple_v_uint_scalar_const_ptr_ptrdiff_ops)
diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md
index c9cedcfc08d..5409dce16ff 100644
--- a/gcc/config/riscv/thead-vector.md
+++ b/gcc/config/riscv/thead-vector.md
@@ -46,6 +46,13 @@ (define_c_enum "unspec" [
   UNSPEC_TH_VLXSEGHU
   UNSPEC_TH_VLXSEGW
   UNSPEC_TH_VLXSEGWU
+
+  UNSPEC_TH_VLSEGBFF
+  UNSPEC_TH_VLSEGBUFF
+  UNSPEC_TH_VLSEGHFF
+  UNSPEC_TH_VLSEGHUFF
+  UNSPEC_TH_VLSEGWFF
+  UNSPEC_TH_VLSEGWUFF
 ])
 
 (define_int_iterator UNSPEC_TH_VLMEM_OP [
@@ -70,6 +77,9 @@ (define_int_iterator UNSPEC_TH_VLSEGMEM_OP[
   UNSPEC_TH_VLSEGB UNSPEC_TH_VLSEGBU
   UNSPEC_TH_VLSEGH UNSPEC_TH_VLSEGHU
   UNSPEC_TH_VLSEGW UNSPEC_TH_VLSEGWU
+  UNSPEC_TH_VLSEGBFF UNSPEC_TH_VLSEGBUFF
+  UNSPEC_TH_VLSEGHFF UNSPEC_TH_VLSEGHUFF
+  UNSPEC_TH_VLSEGWFF UNSPEC_TH_VLSEGWUFF
 ])
 
 (define_int_iterator UNSPEC_TH_VLSSEGMEM_OP[
@@ -106,6 +116,9 @@ (define_int_attr vlmem_op_attr [
   (UNSPEC_TH_VLXSEGB "b") (UNSPEC_TH_VLXSEGBU "bu")
   (UNSPEC_TH_VLXSEGH "h") (UNSPEC_TH_VLXSEGHU "hu")
   (UNSPEC_TH_VLXSEGW "w") (UNSPEC_TH_VLXSEGWU "wu")
+  (UNSPEC_TH_VLSEGBFF "bff") (UNSPEC_TH_VLSEGBUFF "buff")
+  (UNSPEC_TH_VLSEGHFF "hff") (UNSPEC_TH_VLSEGHUFF "huff")
+  (UNSPEC_TH_VLSEGWFF "wff") (UNSPEC_TH_VLSEGWUFF "wuff")
 ])
 
 (define_int_attr vlmem_order_attr [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c
new file mode 100644
index 00000000000..359f6766e6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlsegff-vsseg.c
@@ -0,0 +1,118 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_th_vector.h"
+
+/*
+** f1:
+**     li\s+[a-x0-9]+,4
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     ret
+*/
+void f1 (void * in, void *out)
+{
+    vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4);
+    vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tu (v, in, 4);
+    vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+    vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+    vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4);
+    vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4);
+    vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4);
+    vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4);
+    vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+    v4 = __riscv_vset (v4, 1, v4_1);
+    __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4);
+}
+
+/*
+** f2:
+**     li\s+[a-x0-9]+,4
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+**     th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     ret
+*/
+void f2 (void * in, void *out, vbool16_t mask)
+{
+    vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4);
+    vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_m (mask, in, 4);
+    vint16m1_t v_0 = __riscv_vget_i16m1(v, 0);
+    vint16m1_t v_1 = __riscv_vget_i16m1(v, 1);
+    vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+    vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+    vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v_0, v2_0, 4);
+    vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v_1, v2_1, 4);
+    vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4);
+    vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4);
+    vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+    v4 = __riscv_vset (v4, 1, v4_1);
+    __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4);
+}
+
+/*
+** f3:
+**     li\s+[a-x0-9]+,4
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2hff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+**     th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     ret
+*/
+void f3 (void * in, void *out, vbool16_t mask)
+{
+    vint16m1x2_t v = __riscv_th_vlseg2hff_v_i16m1x2 (in, 4);
+    vint16m1x2_t v2 = __riscv_th_vlseg2hff_v_i16m1x2_tumu (mask, v, in, 4);
+    vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0);
+    vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1);
+    vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4);
+    vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4);
+    vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4);
+    vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4);
+    vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0);
+    v4 = __riscv_vset (v4, 1, v4_1);
+    __riscv_th_vsseg2h_v_i16m1x2 (out, v4, 4);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c
new file mode 100644
index 00000000000..95580c2a50b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlseguff-vsseg.c
@@ -0,0 +1,115 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+#include "riscv_th_vector.h"
+
+/*
+** f1:
+**     li\s+[a-x0-9]+,4
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     ret
+*/
+void f1(void *in, void *out) {
+  vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4);
+  vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tu(v, in, 4);
+  vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0);
+  vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1);
+  vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v2_0, v2_0, 4);
+  vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v2_1, v2_1, 4);
+  vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4);
+  vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4);
+  vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0);
+  v4 = __riscv_vset(v4, 1, v4_1);
+  __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4);
+}
+
+/*
+** f2:
+**     li\s+[a-x0-9]+,4
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+**     th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     ret
+*/
+void f2(void *in, void *out, vbool16_t mask) {
+  vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4);
+  vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_m(mask, in, 4);
+  vuint16m1_t v_0 = __riscv_vget_u16m1(v, 0);
+  vuint16m1_t v_1 = __riscv_vget_u16m1(v, 1);
+  vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0);
+  vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1);
+  vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1(v_0, v2_0, 4);
+  vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1(v_1, v2_1, 4);
+  vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1(v3_0, v2_0, 4);
+  vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1(v3_1, v2_1, 4);
+  vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0);
+  v4 = __riscv_vset(v4, 1, v4_1);
+  __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4);
+}
+
+/*
+** f3:
+**     li\s+[a-x0-9]+,4
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vlseg2huff\.v\s+v[0-9]+,\([a-x0-9]+\),v0\.t
+**     th\.vsetvli\s+[a-x0-9]+,zero,e16,m1
+**     th\.vmv\.v\.i\s+v[0-9]+,0
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v0\.t
+**     th\.vsetvli\s+zero,zero,e16,m1
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+
+**     th\.vsetvli\s+zero,[a-x0-9]+,e16,m1
+**     th\.vsseg2h\.v\s+v[0-9]+,\([a-x0-9]+\)
+**     ret
+*/
+void f3(void *in, void *out, vbool16_t mask) {
+  vuint16m1x2_t v = __riscv_th_vlseg2huff_v_u16m1x2(in, 4);
+  vuint16m1x2_t v2 = __riscv_th_vlseg2huff_v_u16m1x2_tumu(mask, v, in, 4);
+  vuint16m1_t v2_0 = __riscv_vget_u16m1(v2, 0);
+  vuint16m1_t v2_1 = __riscv_vget_u16m1(v2, 1);
+  vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu(mask, v3_0, v2_0, v2_0, 4);
+  vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu(mask, v3_1, v2_1, v2_1, 4);
+  vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu(mask, v4_0, v3_0, v2_0, 4);
+  vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu(mask, v4_1, v3_1, v2_1, 4);
+  vuint16m1x2_t v4 = __riscv_vset(v4, 0, v4_0);
+  v4 = __riscv_vset(v4, 1, v4_1);
+  __riscv_th_vsseg2h_v_u16m1x2(out, v4, 4);
+}
-- 
2.47.1

[PATCH 4/4] RISC-V: Add support for xtheadvector fault-only-first segment load/store intrinsics

Reply via email to