The instructions have the same function as RVV1.0. Overall there are only general differences between XTheadVector and RVV1.0.
Signed-off-by: Huang Tao <eric.hu...@linux.alibaba.com> --- target/riscv/helper.h | 17 +++++++++ .../riscv/insn_trans/trans_xtheadvector.c.inc | 18 +++++---- target/riscv/vector_helper.c | 16 ++++---- target/riscv/vector_internals.h | 9 +++++ target/riscv/xtheadvector_helper.c | 38 +++++++++++++++++++ 5 files changed, 82 insertions(+), 16 deletions(-) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 88e3a18e17..12b5e4573a 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -2099,3 +2099,20 @@ DEF_HELPER_6(th_vfmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(th_vfnmsub_vf_h, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(th_vfnmsub_vf_w, void, ptr, ptr, i64, ptr, env, i32) DEF_HELPER_6(th_vfnmsub_vf_d, void, ptr, ptr, i64, ptr, env, i32) + +DEF_HELPER_6(th_vfwmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwnmacc_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwnmacc_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwnmsac_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwnmsac_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(th_vfwmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(th_vfwmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(th_vfwnmacc_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(th_vfwnmacc_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(th_vfwmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(th_vfwmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(th_vfwnmsac_vf_h, void, ptr, ptr, i64, ptr, env, i32) +DEF_HELPER_6(th_vfwnmsac_vf_w, void, ptr, ptr, i64, ptr, env, i32) diff --git a/target/riscv/insn_trans/trans_xtheadvector.c.inc b/target/riscv/insn_trans/trans_xtheadvector.c.inc index af512c489b..7220b7d607 100644 --- a/target/riscv/insn_trans/trans_xtheadvector.c.inc +++ b/target/riscv/insn_trans/trans_xtheadvector.c.inc @@ -2037,20 +2037,22 @@ GEN_OPFVF_TRANS_TH(th_vfnmadd_vf, opfvf_check_th) GEN_OPFVF_TRANS_TH(th_vfmsub_vf, opfvf_check_th) GEN_OPFVF_TRANS_TH(th_vfnmsub_vf, opfvf_check_th) +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ +GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmacc_vv, opfvv_widen_check_th) +GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmacc_vv, opfvv_widen_check_th) +GEN_OPFVV_WIDEN_TRANS_TH(th_vfwmsac_vv, opfvv_widen_check_th) +GEN_OPFVV_WIDEN_TRANS_TH(th_vfwnmsac_vv, opfvv_widen_check_th) +GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmacc_vf) +GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmacc_vf) +GEN_OPFVF_WIDEN_TRANS_TH(th_vfwmsac_vf) +GEN_OPFVF_WIDEN_TRANS_TH(th_vfwnmsac_vf) + #define TH_TRANS_STUB(NAME) \ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ { \ return require_xtheadvector(s); \ } -TH_TRANS_STUB(th_vfwmacc_vv) -TH_TRANS_STUB(th_vfwmacc_vf) -TH_TRANS_STUB(th_vfwnmacc_vv) -TH_TRANS_STUB(th_vfwnmacc_vf) -TH_TRANS_STUB(th_vfwmsac_vv) -TH_TRANS_STUB(th_vfwmsac_vf) -TH_TRANS_STUB(th_vfwnmsac_vv) -TH_TRANS_STUB(th_vfwnmsac_vf) TH_TRANS_STUB(th_vfsqrt_v) TH_TRANS_STUB(th_vfmin_vv) TH_TRANS_STUB(th_vfmin_vf) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 165221e08b..ef89794bdd 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -3332,13 +3332,13 @@ GEN_VEXT_VF(vfnmsub_vf_w, 4) GEN_VEXT_VF(vfnmsub_vf_d, 8) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ -static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), float16_to_float32(b, true, s), d, 0, s); } -static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), d, 0, s); @@ -3364,7 +3364,7 @@ GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) GEN_VEXT_VF(vfwmaccbf16_vf, 4) -static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), float16_to_float32(b, true, s), d, @@ -3372,7 +3372,7 @@ static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) s); } -static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), d, float_muladd_negate_c | @@ -3388,14 +3388,14 @@ RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) GEN_VEXT_VF(vfwnmacc_vf_h, 4) GEN_VEXT_VF(vfwnmacc_vf_w, 8) -static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), float16_to_float32(b, true, s), d, float_muladd_negate_c, s); } -static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), d, @@ -3411,14 +3411,14 @@ RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) GEN_VEXT_VF(vfwmsac_vf_h, 4) GEN_VEXT_VF(vfwmsac_vf_w, 8) -static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) +uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) { return float32_muladd(float16_to_float32(a, true, s), float16_to_float32(b, true, s), d, float_muladd_negate_product, s); } -static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) +uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) { return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), d, diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h index 5733640e0d..535d31007d 100644 --- a/target/riscv/vector_internals.h +++ b/target/riscv/vector_internals.h @@ -385,4 +385,13 @@ uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s); uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s); uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s); +uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s); +uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s); +uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s); +uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s); +uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s); +uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s); +uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s); +uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s); + #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ diff --git a/target/riscv/xtheadvector_helper.c b/target/riscv/xtheadvector_helper.c index 1d2da6ffb7..ac8e576c49 100644 --- a/target/riscv/xtheadvector_helper.c +++ b/target/riscv/xtheadvector_helper.c @@ -2904,3 +2904,41 @@ THCALL(TH_OPFVF3, th_vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) GEN_TH_VF(th_vfnmsub_vf_h, 2, 2, clearh_th) GEN_TH_VF(th_vfnmsub_vf_w, 4, 4, clearl_th) GEN_TH_VF(th_vfnmsub_vf_d, 8, 8, clearq_th) + +/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ + +THCALL(TH_OPFVV3, th_vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) +THCALL(TH_OPFVV3, th_vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) +GEN_TH_VV_ENV(th_vfwmacc_vv_h, 2, 4, clearl_th) +GEN_TH_VV_ENV(th_vfwmacc_vv_w, 4, 8, clearq_th) +THCALL(TH_OPFVF3, th_vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) +THCALL(TH_OPFVF3, th_vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) +GEN_TH_VF(th_vfwmacc_vf_h, 2, 4, clearl_th) +GEN_TH_VF(th_vfwmacc_vf_w, 4, 8, clearq_th) + +THCALL(TH_OPFVV3, th_vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) +THCALL(TH_OPFVV3, th_vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) +GEN_TH_VV_ENV(th_vfwnmacc_vv_h, 2, 4, clearl_th) +GEN_TH_VV_ENV(th_vfwnmacc_vv_w, 4, 8, clearq_th) +THCALL(TH_OPFVF3, th_vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) +THCALL(TH_OPFVF3, th_vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) +GEN_TH_VF(th_vfwnmacc_vf_h, 2, 4, clearl_th) +GEN_TH_VF(th_vfwnmacc_vf_w, 4, 8, clearq_th) + +THCALL(TH_OPFVV3, th_vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) +THCALL(TH_OPFVV3, th_vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) +GEN_TH_VV_ENV(th_vfwmsac_vv_h, 2, 4, clearl_th) +GEN_TH_VV_ENV(th_vfwmsac_vv_w, 4, 8, clearq_th) +THCALL(TH_OPFVF3, th_vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) +THCALL(TH_OPFVF3, th_vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) +GEN_TH_VF(th_vfwmsac_vf_h, 2, 4, clearl_th) +GEN_TH_VF(th_vfwmsac_vf_w, 4, 8, clearq_th) + +THCALL(TH_OPFVV3, th_vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) +THCALL(TH_OPFVV3, th_vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) +GEN_TH_VV_ENV(th_vfwnmsac_vv_h, 2, 4, clearl_th) +GEN_TH_VV_ENV(th_vfwnmsac_vv_w, 4, 8, clearq_th) +THCALL(TH_OPFVF3, th_vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) +THCALL(TH_OPFVF3, th_vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) +GEN_TH_VF(th_vfwnmsac_vf_h, 2, 4, clearl_th) +GEN_TH_VF(th_vfwnmsac_vf_w, 4, 8, clearq_th) -- 2.44.0