From: Frank Chang <frank.ch...@sifive.com> Separate the implementation of vfwredsum.vs and vfwredosum.vs.
Introduce propagate NaN feature for vfwredsum.vs as implementations are permitted to canonicalize the NaN and, if the NaN is signaling, set the invalid exception flag. Signed-off-by: Frank Chang <frank.ch...@sifive.com> --- target/riscv/helper.h | 2 + target/riscv/insn32.decode | 3 +- target/riscv/insn_trans/trans_rvv.inc.c | 3 +- target/riscv/vector_helper.c | 71 +++++++++++++++++++++++-- 4 files changed, 73 insertions(+), 6 deletions(-) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 6957a98237..cfe9baa253 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1078,6 +1078,8 @@ DEF_HELPER_6(vfredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwredosum_vs_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vfwredosum_vs_w, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmand_mm, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vmnand_mm, void, ptr, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 0fe46c10c2..e32946b1f5 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -580,7 +580,8 @@ vfredosum_vs 000011 . ..... ..... 001 ..... 1010111 @r_vm vfredmin_vs 000101 . ..... ..... 001 ..... 1010111 @r_vm vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm # Vector widening ordered and unordered float reduction sum -vfwredsum_vs 1100-1 . ..... ..... 001 ..... 1010111 @r_vm +vfwredsum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm +vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r vmandnot_mm 011000 - ..... ..... 010 ..... 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 37eee6cf97..10d8b8b00d 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -3016,7 +3016,8 @@ GEN_OPFVV_TRANS(vfredmax_vs, reduction_check) GEN_OPFVV_TRANS(vfredmin_vs, reduction_check) /* Vector Widening Floating-Point Reduction Instructions */ -GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_check) +GEN_OPFVV_WIDEN_TRANS(vfwredsum_vs, reduction_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwredosum_vs, reduction_widen_check) /* *** Vector Mask Operations diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 8465aec94e..2b2b1f521f 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -4805,6 +4805,51 @@ GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, false, float64_minnum_noprop, clearq) /* Vector Widening Floating-Point Reduction Instructions */ +/* Ordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ +void HELPER(vfwredosum_vs_h)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t vm = vext_vm(desc); + uint32_t vta = vext_vta(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen >> 3; + uint32_t s1 = *((uint32_t *)vs1 + H4(0)); + + for (i = 0; i < vl; i++) { + uint16_t s2 = *((uint16_t *)vs2 + H2(i)); + if (!vm && !vext_elem_mask(v0, i)) { + continue; + } + s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), + &env->fp_status); + } + *((uint32_t *)vd + H4(0)) = s1; + clearl(vd, vta, 1, sizeof(uint32_t), tot); +} + +void HELPER(vfwredosum_vs_w)(void *vd, void *v0, void *vs1, + void *vs2, CPURISCVState *env, uint32_t desc) +{ + uint32_t vm = vext_vm(desc); + uint32_t vta = vext_vta(desc); + uint32_t vl = env->vl; + uint32_t i; + uint32_t tot = env_archcpu(env)->cfg.vlen >> 3; + uint64_t s1 = *((uint64_t *)vs1); + + for (i = 0; i < vl; i++) { + uint32_t s2 = *((uint32_t *)vs2 + H4(i)); + if (!vm && !vext_elem_mask(v0, i)) { + continue; + } + s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), + &env->fp_status); + } + *((uint64_t *)vd) = s1; + clearq(vd, vta, 1, sizeof(uint64_t), tot); +} + /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, void *vs2, CPURISCVState *env, uint32_t desc) @@ -4813,18 +4858,27 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, uint32_t vta = vext_vta(desc); uint32_t vl = env->vl; uint32_t i; - uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint32_t tot = env_archcpu(env)->cfg.vlen >> 3; uint32_t s1 = *((uint32_t *)vs1 + H4(0)); + bool active = false; for (i = 0; i < vl; i++) { uint16_t s2 = *((uint16_t *)vs2 + H2(i)); if (!vm && !vext_elem_mask(v0, i)) { continue; } + active = true; s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), &env->fp_status); } - *((uint32_t *)vd + H4(0)) = s1; + + if (vl > 0) { + if (!active) { + *((uint32_t *)vd + H4(0)) = propagate_nan(s1, 32, &env->fp_status); + } else { + *((uint32_t *)vd + H4(0)) = s1; + } + } clearl(vd, vta, 1, sizeof(uint32_t), tot); } @@ -4835,18 +4889,27 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, uint32_t vta = vext_vta(desc); uint32_t vl = env->vl; uint32_t i; - uint32_t tot = env_archcpu(env)->cfg.vlen / 8; + uint32_t tot = env_archcpu(env)->cfg.vlen >> 3; uint64_t s1 = *((uint64_t *)vs1); + bool active = false; \ for (i = 0; i < vl; i++) { uint32_t s2 = *((uint32_t *)vs2 + H4(i)); if (!vm && !vext_elem_mask(v0, i)) { continue; } + active = true; s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), &env->fp_status); } - *((uint64_t *)vd) = s1; + + if (vl > 0) { + if (!active) { + *((uint64_t *)vd) = propagate_nan(s1, 64, &env->fp_status); + } else { + *((uint64_t *)vd) = s1; + } + } clearq(vd, vta, 1, sizeof(uint64_t), tot); } -- 2.17.1