Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions

2020-03-28 Thread LIU Zhiwei



On 2020/3/29 0:13, LIU Zhiwei wrote:



On 2020/3/28 23:47, Richard Henderson wrote:

On 3/28/20 8:17 AM, LIU Zhiwei wrote:

Missed the improvement here.  See tcg_gen_mulsu2_i64.
Though I have not gotten the principle, the code in 
tcg_gen_mulsu2_i64 is much

tidier.

Let A = signed operand,
 B = unsigned operand
 P = unsigned product

If the sign bit A is set, then P is too large.
In that case we subtract 2**64 * B to fix that:

 HI_P -= (A < 0 ? B : 0)

where the conditional is computed as (A >> 63) & B.


I think I get it.

LET  A = 2 ** 64  - X

THEN

X = 2 ** 64 - A
SIGNED_P = -X * B

if (A * B == P) then

(2 ** 64  - X) * B == P
2 **64 * B - X * B == P

-X *B == P - 2**64*B

HI_P -= (A < 0 ? B :0)


It's confusing here. I paste the clearer code.

/*
 * Let  A = signed operand,
 *  B = unsigned operand
 *  P = mulu64(A, B), unsigned product
 *
 * LET  X = 2 ** 64  - A, 2's complement of A
 *  SP = signed product
 * THEN
 *  IF A < 0
 *  SP = -X * B
 * = -(2 ** 64 - A) * B
 * = A * B - 2 ** 64 * B
 * = P - 2 ** 64 * B
 *  ELSE
 *  SP = P
 * THEN
 *  HI_P -= (A < 0 ? B : 0)
 */

static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
{
uint64_t hi_64, lo_64;

mulu64(_64, _64, s2, s1);

hi_64 -= s2 < 0 ? s1 : 0;
return hi_64;
}

Zhiwei

Zhiwei


r~






Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions

2020-03-28 Thread LIU Zhiwei




On 2020/3/28 23:47, Richard Henderson wrote:

On 3/28/20 8:17 AM, LIU Zhiwei wrote:

Missed the improvement here.  See tcg_gen_mulsu2_i64.

Though I have not gotten the principle, the code in tcg_gen_mulsu2_i64 is much
tidier.

Let A = signed operand,
 B = unsigned operand
 P = unsigned product

If the sign bit A is set, then P is too large.
In that case we subtract 2**64 * B to fix that:

 HI_P -= (A < 0 ? B : 0)

where the conditional is computed as (A >> 63) & B.


I think I get it.

LET  A = 2 ** 64  - X

THEN

X = 2 ** 64 - A
SIGNED_P = -X * B

if (A * B == P) then

(2 ** 64  - X) * B == P
2 **64 * B - X * B == P

-X *B == P - 2**64*B

HI_P -= (A < 0 ? B :0)

Zhiwei


r~





Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions

2020-03-28 Thread Richard Henderson
On 3/28/20 8:17 AM, LIU Zhiwei wrote:
>> Missed the improvement here.  See tcg_gen_mulsu2_i64.
> Though I have not gotten the principle, the code in tcg_gen_mulsu2_i64 is much
> tidier.

Let A = signed operand,
B = unsigned operand
P = unsigned product

If the sign bit A is set, then P is too large.
In that case we subtract 2**64 * B to fix that:

HI_P -= (A < 0 ? B : 0)

where the conditional is computed as (A >> 63) & B.


r~



Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions

2020-03-28 Thread LIU Zhiwei




On 2020/3/28 8:06, Richard Henderson wrote:

On 3/17/20 8:06 AM, LIU Zhiwei wrote:

+static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
+{
+uint64_t hi_64, lo_64, abs_s2 = s2;
+
+if (s2 < 0) {
+abs_s2 = -s2;
+}
+mulu64(_64, _64, abs_s2, s1);
+if (s2 < 0) {
+lo_64 = ~lo_64;
+hi_64 = ~hi_64;
+if (lo_64 == UINT64_MAX) {
+lo_64 = 0;
+hi_64 += 1;
+} else {
+lo_64 += 1;
+}
+}
+
+return hi_64;
+}

Missed the improvement here.  See tcg_gen_mulsu2_i64.
Though I have not gotten the principle, the code in tcg_gen_mulsu2_i64 
is much tidier.


Thanks for pointing that.

Zhiwei

Otherwise,
Reviewed-by: Richard Henderson 


r~





Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions

2020-03-27 Thread Richard Henderson
On 3/17/20 8:06 AM, LIU Zhiwei wrote:
> +static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
> +{
> +uint64_t hi_64, lo_64, abs_s2 = s2;
> +
> +if (s2 < 0) {
> +abs_s2 = -s2;
> +}
> +mulu64(_64, _64, abs_s2, s1);
> +if (s2 < 0) {
> +lo_64 = ~lo_64;
> +hi_64 = ~hi_64;
> +if (lo_64 == UINT64_MAX) {
> +lo_64 = 0;
> +hi_64 += 1;
> +} else {
> +lo_64 += 1;
> +}
> +}
> +
> +return hi_64;
> +}

Missed the improvement here.  See tcg_gen_mulsu2_i64.

Otherwise,
Reviewed-by: Richard Henderson 


r~




Re: [PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions

2020-03-25 Thread Alistair Francis
On Tue, Mar 17, 2020 at 8:43 AM LIU Zhiwei  wrote:
>
> Signed-off-by: LIU Zhiwei 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/helper.h   |  33 +
>  target/riscv/insn32.decode  |   8 ++
>  target/riscv/insn_trans/trans_rvv.inc.c |  10 ++
>  target/riscv/vector_helper.c| 156 
>  4 files changed, 207 insertions(+)
>
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index c7d4ff185a..f42a12eef3 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -525,3 +525,36 @@ DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, 
> i32)
>  DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32)
>  DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32)
>  DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> +
> +DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
> +DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
> +DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index aafbdc6be7..abfed469bc 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -363,6 +363,14 @@ vmaxu_vv000110 . . . 000 . 1010111 
> @r_vm
>  vmaxu_vx000110 . . . 100 . 1010111 @r_vm
>  vmax_vv 000111 . . . 000 . 1010111 @r_vm
>  vmax_vx 000111 . . . 100 . 1010111 @r_vm
> +vmul_vv 100101 . . . 010 . 1010111 @r_vm
> +vmul_vx 100101 . . . 110 . 1010111 @r_vm
> +vmulh_vv100111 . . . 010 . 1010111 @r_vm
> +vmulh_vx100111 . . . 110 . 1010111 @r_vm
> +vmulhu_vv   100100 . . . 010 . 1010111 @r_vm
> +vmulhu_vx   100100 . . . 110 . 1010111 @r_vm
> +vmulhsu_vv  100110 . . . 010 . 1010111 @r_vm
> +vmulhsu_vx  100110 . . . 110 . 1010111 @r_vm
>
>  vsetvli 0 ... . 111 . 1010111  @r2_zimm
>  vsetvl  100 . . 111 . 1010111  @r
> diff --git a/target/riscv/insn_trans/trans_rvv.inc.c 
> b/target/riscv/insn_trans/trans_rvv.inc.c
> index 53c49ee15c..c276beabd6 100644
> --- a/target/riscv/insn_trans/trans_rvv.inc.c
> +++ b/target/riscv/insn_trans/trans_rvv.inc.c
> @@ -1452,3 +1452,13 @@ GEN_OPIVX_TRANS(vminu_vx, opivx_check)
>  GEN_OPIVX_TRANS(vmin_vx,  opivx_check)
>  GEN_OPIVX_TRANS(vmaxu_vx, opivx_check)
>  GEN_OPIVX_TRANS(vmax_vx,  opivx_check)
> +
> +/* Vector Single-Width Integer Multiply Instructions */
> +GEN_OPIVV_GVEC_TRANS(vmul_vv,  mul)
> +GEN_OPIVV_TRANS(vmulh_vv, opivv_check)
> +GEN_OPIVV_TRANS(vmulhu_vv, opivv_check)
> +GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check)
> +GEN_OPIVX_GVEC_TRANS(vmul_vx,  muls)
> +GEN_OPIVX_TRANS(vmulh_vx, opivx_check)
> +GEN_OPIVX_TRANS(vmulhu_vx, opivx_check)
> +GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check)
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 32c2760a8a..56ba9a7422 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> 

[PATCH v6 18/61] target/riscv: vector single-width integer multiply instructions

2020-03-17 Thread LIU Zhiwei
Signed-off-by: LIU Zhiwei 
---
 target/riscv/helper.h   |  33 +
 target/riscv/insn32.decode  |   8 ++
 target/riscv/insn_trans/trans_rvv.inc.c |  10 ++
 target/riscv/vector_helper.c| 156 
 4 files changed, 207 insertions(+)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index c7d4ff185a..f42a12eef3 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -525,3 +525,36 @@ DEF_HELPER_6(vmax_vx_b, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vmax_vx_h, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vmax_vx_w, void, ptr, ptr, tl, ptr, env, i32)
 DEF_HELPER_6(vmax_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+
+DEF_HELPER_6(vmul_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulh_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vv_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmul_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulh_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_b, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_h, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_w, void, ptr, ptr, tl, ptr, env, i32)
+DEF_HELPER_6(vmulhsu_vx_d, void, ptr, ptr, tl, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index aafbdc6be7..abfed469bc 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -363,6 +363,14 @@ vmaxu_vv000110 . . . 000 . 1010111 
@r_vm
 vmaxu_vx000110 . . . 100 . 1010111 @r_vm
 vmax_vv 000111 . . . 000 . 1010111 @r_vm
 vmax_vx 000111 . . . 100 . 1010111 @r_vm
+vmul_vv 100101 . . . 010 . 1010111 @r_vm
+vmul_vx 100101 . . . 110 . 1010111 @r_vm
+vmulh_vv100111 . . . 010 . 1010111 @r_vm
+vmulh_vx100111 . . . 110 . 1010111 @r_vm
+vmulhu_vv   100100 . . . 010 . 1010111 @r_vm
+vmulhu_vx   100100 . . . 110 . 1010111 @r_vm
+vmulhsu_vv  100110 . . . 010 . 1010111 @r_vm
+vmulhsu_vx  100110 . . . 110 . 1010111 @r_vm
 
 vsetvli 0 ... . 111 . 1010111  @r2_zimm
 vsetvl  100 . . 111 . 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c 
b/target/riscv/insn_trans/trans_rvv.inc.c
index 53c49ee15c..c276beabd6 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -1452,3 +1452,13 @@ GEN_OPIVX_TRANS(vminu_vx, opivx_check)
 GEN_OPIVX_TRANS(vmin_vx,  opivx_check)
 GEN_OPIVX_TRANS(vmaxu_vx, opivx_check)
 GEN_OPIVX_TRANS(vmax_vx,  opivx_check)
+
+/* Vector Single-Width Integer Multiply Instructions */
+GEN_OPIVV_GVEC_TRANS(vmul_vv,  mul)
+GEN_OPIVV_TRANS(vmulh_vv, opivv_check)
+GEN_OPIVV_TRANS(vmulhu_vv, opivv_check)
+GEN_OPIVV_TRANS(vmulhsu_vv, opivv_check)
+GEN_OPIVX_GVEC_TRANS(vmul_vx,  muls)
+GEN_OPIVX_TRANS(vmulh_vx, opivx_check)
+GEN_OPIVX_TRANS(vmulhu_vx, opivx_check)
+GEN_OPIVX_TRANS(vmulhsu_vx, opivx_check)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 32c2760a8a..56ba9a7422 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -852,6 +852,10 @@ GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, 
clearl)
 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t,