Re: [PATCH 11/31] target/arm: Implement SVE2 integer add/subtract long

2020-04-13 Thread Laurent Desnogues
On Fri, Mar 27, 2020 at 12:09 AM Richard Henderson
 wrote:
[...]
> diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
> index bee00eaa44..7d7a59f620 100644
> --- a/target/arm/sve_helper.c
> +++ b/target/arm/sve_helper.c
> @@ -1088,6 +1088,49 @@ DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
>  #undef DO_ZPZ
>  #undef DO_ZPZ_D
>
> +/*
> + * Three-operand expander, unpredicated, in which the two inputs are
> + * selected from the top or bottom half of the wide column.
> + */
> +#define DO_ZZZ_TB(NAME, TYPE, TYPEN, OP) \
> +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
> +{  \
> +intptr_t i, opr_sz = simd_oprsz(desc); \
> +int sel1 = (simd_data(desc) & 1) * sizeof(TYPE);   \
> +int sel2 = (simd_data(desc) & 2) * (sizeof(TYPE) / 2); \
> +for (i = 0; i < opr_sz; i += sizeof(TYPE)) {   \
> +TYPE nn = (TYPEN)(*(TYPE *)(vn + i) >> sel1);  \
> +TYPE mm = (TYPEN)(*(TYPE *)(vm + i) >> sel2);  \
> +*(TYPE *)(vd + i) = OP(nn, mm);\
> +}  \
> +}

For sel1/sel2 the multiplicand should be the number of bits in TYPEN.

Laurent



[PATCH 11/31] target/arm: Implement SVE2 integer add/subtract long

2020-03-26 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sve.h| 24 
 target/arm/sve.decode  | 19 
 target/arm/sve_helper.c| 43 +++
 target/arm/translate-sve.c | 46 ++
 4 files changed, 132 insertions(+)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 368185944a..475fce7f3a 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1372,6 +1372,30 @@ DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, 
ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(sve2_saddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_ssubl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sabdl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sabdl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sabdl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uaddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_usubl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uabdl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uabdl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uabdl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(sve_ldr, TCG_CALL_NO_WG, void, env, ptr, tl, int)
 DEF_HELPER_FLAGS_4(sve_str, TCG_CALL_NO_WG, void, env, ptr, tl, int)
 
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 86aee38668..a239fd3479 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1161,3 +1161,22 @@ SUQADD  01000100 .. 011 100 100 ... . .  
@rdn_pg_rm
 USQADD  01000100 .. 011 101 100 ... . .  @rdn_pg_rm
 SQSUB_zpzz  01000100 .. 011 110 100 ... . .  @rdm_pg_rn # SQSUBR
 UQSUB_zpzz  01000100 .. 011 111 100 ... . .  @rdm_pg_rn # UQSUBR
+
+ SVE2 Widening Integer Arithmetic
+
+## SVE2 integer add/subtract long
+
+SADDLB  01000101 .. 0 . 00  . .  @rd_rn_rm
+SADDLT  01000101 .. 0 . 00 0001 . .  @rd_rn_rm
+UADDLB  01000101 .. 0 . 00 0010 . .  @rd_rn_rm
+UADDLT  01000101 .. 0 . 00 0011 . .  @rd_rn_rm
+
+SSUBLB  01000101 .. 0 . 00 0100 . .  @rd_rn_rm
+SSUBLT  01000101 .. 0 . 00 0101 . .  @rd_rn_rm
+USUBLB  01000101 .. 0 . 00 0110 . .  @rd_rn_rm
+USUBLT  01000101 .. 0 . 00 0111 . .  @rd_rn_rm
+
+SABDLB  01000101 .. 0 . 00 1100 . .  @rd_rn_rm
+SABDLT  01000101 .. 0 . 00 1101 . .  @rd_rn_rm
+UABDLB  01000101 .. 0 . 00 1110 . .  @rd_rn_rm
+UABDLT  01000101 .. 0 . 00  . .  @rd_rn_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index bee00eaa44..7d7a59f620 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1088,6 +1088,49 @@ DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
 #undef DO_ZPZ
 #undef DO_ZPZ_D
 
+/*
+ * Three-operand expander, unpredicated, in which the two inputs are
+ * selected from the top or bottom half of the wide column.
+ */
+#define DO_ZZZ_TB(NAME, TYPE, TYPEN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{  \
+intptr_t i, opr_sz = simd_oprsz(desc); \
+int sel1 = (simd_data(desc) & 1) * sizeof(TYPE);   \
+int sel2 = (simd_data(desc) & 2) * (sizeof(TYPE) / 2); \
+for (i = 0; i < opr_sz; i += sizeof(TYPE)) {   \
+TYPE nn = (TYPEN)(*(TYPE *)(vn + i) >> sel1);  \
+TYPE mm = (TYPEN)(*(TYPE *)(vm + i) >> sel2);  \
+*(TYPE *)(vd + i) = OP(nn, mm);\
+}  \
+}
+
+DO_ZZZ_TB(sve2_saddl_h, int16_t, int8_t, DO_ADD)
+DO_ZZZ_TB(sve2_saddl_s, int32_t, int16_t, DO_ADD)
+DO_ZZZ_TB(sve2_saddl_d, int64_t, int32_t, DO_ADD)
+
+DO_ZZZ_TB(sve2_ssubl_h, int16_t, int8_t, DO_SUB)