riscv: Add widening floating-point fused mul-add instructions for XTheadVector

Huang Tao Fri, 12 Apr 2024 02:18:07 -0700

The instructions have the same function as RVV1.0. Overall there are only
general differences between XTheadVector and RVV1.0.


Signed-off-by: Huang Tao <eric.hu...@linux.alibaba.com>
---
 target/riscv/helper.h                         | 17 +++++++++
 .../riscv/insn_trans/trans_xtheadvector.c.inc | 18 +++++----
 target/riscv/vector_helper.c                  | 16 ++++----
 target/riscv/vector_internals.h               |  9 +++++
 target/riscv/xtheadvector_helper.c            | 38 +++++++++++++++++++
 5 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 88e3a18e17..12b5e4573a 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -2099,3 +2099,20 @@ DEF_HELPER_6(th_vfmsub_vf_d, void, ptr, ptr, i64, ptr, 
env, i32)
 DEF_HELPER_6(th_vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32)
 DEF_HELPER_6(th_vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32)
 DEF_HELPER_6(th_vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32)
+
+DEF_HELPER_6(th_vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(th_vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32)
diff --git a/target/riscv/insn_trans/trans_xtheadvector.c.inc 
b/target/riscv/insn_trans/trans_xtheadvector.c.inc
index af512c489b..7220b7d607 100644
--- a/target/riscv/insn_trans/trans_xtheadvector.c.inc
+++ b/target/riscv/insn_trans/trans_xtheadvector.c.inc
@@ -2037,20 +2037,22 @@ GEN_OPFVF_TRANS_TH(th_vfnmadd_vf, opfvf_check_th)
 GEN_OPFVF_TRANS_TH(th_vfmsub_vf, opfvf_check_th)
 GEN_OPFVF_TRANS_TH(th_vfnmsub_vf, opfvf_check_th)
 
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmacc_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmacc_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmsac_vv, opfvv_widen_check_th)
+GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmsac_vv, opfvv_widen_check_th)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmacc_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmacc_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmsac_vf)
+GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmsac_vf)
+
 #define TH_TRANS_STUB(NAME)                                \
 static bool trans_##NAME(DisasContext *s, arg_##NAME *a)   \
 {                                                          \
     return require_xtheadvector(s);                        \
 }
 
-TH_TRANS_STUB(th_vfwmacc_vv)
-TH_TRANS_STUB(th_vfwmacc_vf)
-TH_TRANS_STUB(th_vfwnmacc_vv)
-TH_TRANS_STUB(th_vfwnmacc_vf)
-TH_TRANS_STUB(th_vfwmsac_vv)
-TH_TRANS_STUB(th_vfwmsac_vf)
-TH_TRANS_STUB(th_vfwnmsac_vv)
-TH_TRANS_STUB(th_vfwnmsac_vf)
 TH_TRANS_STUB(th_vfsqrt_v)
 TH_TRANS_STUB(th_vfmin_vv)
 TH_TRANS_STUB(th_vfmin_vf)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 165221e08b..ef89794bdd 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -3332,13 +3332,13 @@ GEN_VEXT_VF(vfnmsub_vf_w, 4)
 GEN_VEXT_VF(vfnmsub_vf_d, 8)
 
 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
-static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
 {
     return float32_muladd(float16_to_float32(a, true, s),
                           float16_to_float32(b, true, s), d, 0, s);
 }
 
-static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
 {
     return float64_muladd(float32_to_float64(a, s),
                           float32_to_float64(b, s), d, 0, s);
@@ -3364,7 +3364,7 @@ GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
 
-static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
 {
     return float32_muladd(float16_to_float32(a, true, s),
                           float16_to_float32(b, true, s), d,
@@ -3372,7 +3372,7 @@ static uint32_t fwnmacc16(uint16_t a, uint16_t b, 
uint32_t d, float_status *s)
                           s);
 }
 
-static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
 {
     return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
                           d, float_muladd_negate_c |
@@ -3388,14 +3388,14 @@ RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, 
fwnmacc32)
 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
 
-static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
 {
     return float32_muladd(float16_to_float32(a, true, s),
                           float16_to_float32(b, true, s), d,
                           float_muladd_negate_c, s);
 }
 
-static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
 {
     return float64_muladd(float32_to_float64(a, s),
                           float32_to_float64(b, s), d,
@@ -3411,14 +3411,14 @@ RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, 
fwmsac32)
 GEN_VEXT_VF(vfwmsac_vf_h, 4)
 GEN_VEXT_VF(vfwmsac_vf_w, 8)
 
-static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
+uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
 {
     return float32_muladd(float16_to_float32(a, true, s),
                           float16_to_float32(b, true, s), d,
                           float_muladd_negate_product, s);
 }
 
-static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
+uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
 {
     return float64_muladd(float32_to_float64(a, s),
                           float32_to_float64(b, s), d,
diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h
index 5733640e0d..535d31007d 100644
--- a/target/riscv/vector_internals.h
+++ b/target/riscv/vector_internals.h
@@ -385,4 +385,13 @@ uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, 
float_status *s);
 uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s);
 uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s);
 
+uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s);
+uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s);
+
 #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */
diff --git a/target/riscv/xtheadvector_helper.c 
b/target/riscv/xtheadvector_helper.c
index 1d2da6ffb7..ac8e576c49 100644
--- a/target/riscv/xtheadvector_helper.c
+++ b/target/riscv/xtheadvector_helper.c
@@ -2904,3 +2904,41 @@ THCALL(TH_OPFVF3, th_vfnmsub_vf_d, OP_UUU_D, H8, H8, 
fnmsub64)
 GEN_TH_VF(th_vfnmsub_vf_h, 2, 2, clearh_th)
 GEN_TH_VF(th_vfnmsub_vf_w, 4, 4, clearl_th)
 GEN_TH_VF(th_vfnmsub_vf_d, 8, 8, clearq_th)
+
+/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
+
+THCALL(TH_OPFVV3, th_vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
+THCALL(TH_OPFVV3, th_vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
+GEN_TH_VV_ENV(th_vfwmacc_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwmacc_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
+THCALL(TH_OPFVF3, th_vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
+GEN_TH_VF(th_vfwmacc_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwmacc_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
+THCALL(TH_OPFVV3, th_vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
+GEN_TH_VV_ENV(th_vfwnmacc_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwnmacc_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
+THCALL(TH_OPFVF3, th_vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
+GEN_TH_VF(th_vfwnmacc_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwnmacc_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
+THCALL(TH_OPFVV3, th_vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
+GEN_TH_VV_ENV(th_vfwmsac_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwmsac_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
+THCALL(TH_OPFVF3, th_vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
+GEN_TH_VF(th_vfwmsac_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwmsac_vf_w, 4, 8, clearq_th)
+
+THCALL(TH_OPFVV3, th_vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
+THCALL(TH_OPFVV3, th_vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
+GEN_TH_VV_ENV(th_vfwnmsac_vv_h, 2, 4, clearl_th)
+GEN_TH_VV_ENV(th_vfwnmsac_vv_w, 4, 8, clearq_th)
+THCALL(TH_OPFVF3, th_vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
+THCALL(TH_OPFVF3, th_vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
+GEN_TH_VF(th_vfwnmsac_vf_h, 2, 4, clearl_th)
+GEN_TH_VF(th_vfwnmsac_vf_w, 4, 8, clearq_th)
-- 
2.44.0

[PATCH 41/65] target/riscv: Add widening floating-point fused mul-add instructions for XTheadVector

Reply via email to