Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions
On 2020/3/29 0:13, LIU Zhiwei wrote: On 2020/3/28 23:47, Richard Henderson wrote: On 3/28/20 8:17 AM, LIU Zhiwei wrote: Missed the improvement here. See tcg_gen_mulsu2_i64. Though I have not gotten the principle, the code in tcg_gen_mulsu2_i64 is much tidier. Let A = signed operand, B = unsigned operand P = unsigned product If the sign bit A is set, then P is too large. In that case we subtract 2**64 * B to fix that: HI_P -= (A < 0 ? B : 0) where the conditional is computed as (A >> 63) & B. I think I get it. LET A = 2 ** 64 - X THEN X = 2 ** 64 - A SIGNED_P = -X * B if (A * B == P) then (2 ** 64 - X) * B == P 2 **64 * B - X * B == P -X *B == P - 2**64*B HI_P -= (A < 0 ? B :0) It's confusing here. I paste the clearer code. /* * Let A = signed operand, * B = unsigned operand * P = mulu64(A, B), unsigned product * * LET X = 2 ** 64 - A, 2's complement of A * SP = signed product * THEN * IF A < 0 * SP = -X * B * = -(2 ** 64 - A) * B * = A * B - 2 ** 64 * B * = P - 2 ** 64 * B * ELSE * SP = P * THEN * HI_P -= (A < 0 ? B : 0) */ static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) { uint64_t hi_64, lo_64; mulu64(_64, _64, s2, s1); hi_64 -= s2 < 0 ? s1 : 0; return hi_64; } Zhiwei Zhiwei r~
Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions
On 2020/3/28 23:47, Richard Henderson wrote: On 3/28/20 8:17 AM, LIU Zhiwei wrote: Missed the improvement here. See tcg_gen_mulsu2_i64. Though I have not gotten the principle, the code in tcg_gen_mulsu2_i64 is much tidier. Let A = signed operand, B = unsigned operand P = unsigned product If the sign bit A is set, then P is too large. In that case we subtract 2**64 * B to fix that: HI_P -= (A < 0 ? B : 0) where the conditional is computed as (A >> 63) & B. I think I get it. LET A = 2 ** 64 - X THEN X = 2 ** 64 - A SIGNED_P = -X * B if (A * B == P) then (2 ** 64 - X) * B == P 2 **64 * B - X * B == P -X *B == P - 2**64*B HI_P -= (A < 0 ? B :0) Zhiwei r~
Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions
On 3/28/20 8:17 AM, LIU Zhiwei wrote: >> Missed the improvement here. See tcg_gen_mulsu2_i64. > Though I have not gotten the principle, the code in tcg_gen_mulsu2_i64 is much > tidier. Let A = signed operand, B = unsigned operand P = unsigned product If the sign bit A is set, then P is too large. In that case we subtract 2**64 * B to fix that: HI_P -= (A < 0 ? B : 0) where the conditional is computed as (A >> 63) & B. r~
Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions
On 2020/3/28 8:06, Richard Henderson wrote: On 3/17/20 8:06 AM, LIU Zhiwei wrote: +static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) +{ +uint64_t hi_64, lo_64, abs_s2 = s2; + +if (s2 < 0) { +abs_s2 = -s2; +} +mulu64(_64, _64, abs_s2, s1); +if (s2 < 0) { +lo_64 = ~lo_64; +hi_64 = ~hi_64; +if (lo_64 == UINT64_MAX) { +lo_64 = 0; +hi_64 += 1; +} else { +lo_64 += 1; +} +} + +return hi_64; +} Missed the improvement here. See tcg_gen_mulsu2_i64. Though I have not gotten the principle, the code in tcg_gen_mulsu2_i64 is much tidier. Thanks for pointing that. Zhiwei Otherwise, Reviewed-by: Richard Henderson r~
Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions
On 3/17/20 8:06 AM, LIU Zhiwei wrote: > +static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) > +{ > +uint64_t hi_64, lo_64, abs_s2 = s2; > + > +if (s2 < 0) { > +abs_s2 = -s2; > +} > +mulu64(_64, _64, abs_s2, s1); > +if (s2 < 0) { > +lo_64 = ~lo_64; > +hi_64 = ~hi_64; > +if (lo_64 == UINT64_MAX) { > +lo_64 = 0; > +hi_64 += 1; > +} else { > +lo_64 += 1; > +} > +} > + > +return hi_64; > +} Missed the improvement here. See tcg_gen_mulsu2_i64. Otherwise, Reviewed-by: Richard Henderson r~
Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions
On Tue, Mar 17, 2020 at 8:43 AM LIU Zhiwei wrote: > > Signed-off-by: LIU Zhiwei Reviewed-by: Alistair Francis Alistair > --- > target/riscv/helper.h | 33 + > target/riscv/insn32.decode | 8 ++ > target/riscv/insn_trans/trans_rvv.inc.c | 10 ++ > target/riscv/vector_helper.c| 156 > 4 files changed, 207 insertions(+) > > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index c7d4ff185a..f42a12eef3 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -525,3 +525,36 @@ DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, > i32) > DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) > DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) > + > +DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) > +DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) > +DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index aafbdc6be7..abfed469bc 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -363,6 +363,14 @@ vmaxu_vv000110 . . . 000 . 1010111 > @r_vm > vmaxu_vx000110 . . . 100 . 1010111 @r_vm > vmax_vv 000111 . . . 000 . 1010111 @r_vm > vmax_vx 000111 . . . 100 . 1010111 @r_vm > +vmul_vv 100101 . . . 010 . 1010111 @r_vm > +vmul_vx 100101 . . . 110 . 1010111 @r_vm > +vmulh_vv100111 . . . 010 . 1010111 @r_vm > +vmulh_vx100111 . . . 110 . 1010111 @r_vm > +vmulhu_vv 100100 . . . 010 . 1010111 @r_vm > +vmulhu_vx 100100 . . . 110 . 1010111 @r_vm > +vmulhsu_vv 100110 . . . 010 . 1010111 @r_vm > +vmulhsu_vx 100110 . . . 110 . 1010111 @r_vm > > vsetvli 0 ... . 111 . 1010111 @r2_zimm > vsetvl 100 . . 111 . 1010111 @r > diff --git a/target/riscv/insn_trans/trans_rvv.inc.c > b/target/riscv/insn_trans/trans_rvv.inc.c > index 53c49ee15c..c276beabd6 100644 > --- a/target/riscv/insn_trans/trans_rvv.inc.c > +++ b/target/riscv/insn_trans/trans_rvv.inc.c > @@ -1452,3 +1452,13 @@ GEN_OPIVX_TRANS(vminu_vx, opivx_check) > GEN_OPIVX_TRANS(vmin_vx, opivx_check) > GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) > GEN_OPIVX_TRANS(vmax_vx, opivx_check) > + > +/* Vector Single-Width Integer Multiply Instructions */ > +GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) > +GEN_OPIVV_TRANS(vmulh_vv, opivv_check) > +GEN_OPIVV_TRANS(vmulhu_vv, opivv_check) > +GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check) > +GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) > +GEN_OPIVX_TRANS(vmulh_vx, opivx_check) > +GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) > +GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index 32c2760a8a..56ba9a7422 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c >
[PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions
Signed-off-by: LIU Zhiwei --- target/riscv/helper.h | 33 + target/riscv/insn32.decode | 8 ++ target/riscv/insn_trans/trans_rvv.inc.c | 10 ++ target/riscv/vector_helper.c| 156 4 files changed, 207 insertions(+) diff --git a/target/riscv/helper.h b/target/riscv/helper.h index c7d4ff185a..f42a12eef3 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -525,3 +525,36 @@ DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index aafbdc6be7..abfed469bc 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -363,6 +363,14 @@ vmaxu_vv000110 . . . 000 . 1010111 @r_vm vmaxu_vx000110 . . . 100 . 1010111 @r_vm vmax_vv 000111 . . . 000 . 1010111 @r_vm vmax_vx 000111 . . . 100 . 1010111 @r_vm +vmul_vv 100101 . . . 010 . 1010111 @r_vm +vmul_vx 100101 . . . 110 . 1010111 @r_vm +vmulh_vv100111 . . . 010 . 1010111 @r_vm +vmulh_vx100111 . . . 110 . 1010111 @r_vm +vmulhu_vv 100100 . . . 010 . 1010111 @r_vm +vmulhu_vx 100100 . . . 110 . 1010111 @r_vm +vmulhsu_vv 100110 . . . 010 . 1010111 @r_vm +vmulhsu_vx 100110 . . . 110 . 1010111 @r_vm vsetvli 0 ... . 111 . 1010111 @r2_zimm vsetvl 100 . . 111 . 1010111 @r diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 53c49ee15c..c276beabd6 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -1452,3 +1452,13 @@ GEN_OPIVX_TRANS(vminu_vx, opivx_check) GEN_OPIVX_TRANS(vmin_vx, opivx_check) GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) GEN_OPIVX_TRANS(vmax_vx, opivx_check) + +/* Vector Single-Width Integer Multiply Instructions */ +GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) +GEN_OPIVV_TRANS(vmulh_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhu_vv, opivv_check) +GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check) +GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) +GEN_OPIVX_TRANS(vmulh_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhu_vx, opivx_check) +GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 32c2760a8a..56ba9a7422 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -852,6 +852,10 @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t,