Re: [PATCH 21/57] target/arm: Convert FADDP to decodetree

2024-05-23 Thread Peter Maydell
On Mon, 6 May 2024 at 02:07, Richard Henderson
 wrote:
>
> This fixes a bug in which scalar half-precision did not
> diagnose sz == 1 as UNDEFINED.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/helper.h|  4 ++
>  target/arm/tcg/a64.decode  | 12 +
>  target/arm/tcg/translate-a64.c | 87 ++
>  target/arm/tcg/vec_helper.c| 23 +
>  4 files changed, 105 insertions(+), 21 deletions(-)

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH 21/57] target/arm: Convert FADDP to decodetree

2024-05-05 Thread Richard Henderson
This fixes a bug in which scalar half-precision did not
diagnose sz == 1 as UNDEFINED.

Signed-off-by: Richard Henderson 
---
 target/arm/helper.h|  4 ++
 target/arm/tcg/a64.decode  | 12 +
 target/arm/tcg/translate-a64.c | 87 ++
 target/arm/tcg/vec_helper.c| 23 +
 4 files changed, 105 insertions(+), 21 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index ff6e3094f4..8441b49d1f 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1048,6 +1048,10 @@ DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, 
i32)
+
 #ifdef TARGET_AARCH64
 #include "tcg/helper-a64.h"
 #include "tcg/helper-sve.h"
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 8d0a6a147e..9d28cb5894 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -29,6 +29,7 @@
  rd imm
 _sf rd rn imm sf
   imm
+_e   rd rn esz
 _e  rd rn rm esz
 _e  rd rn rm idx esz
 _e  q rd rn esz
@@ -36,6 +37,9 @@
 _e q rd rn rm idx esz
 _eq rd rn rm ra esz
 
+@rr_h    ... . .. rn:5 rd:5 _e esz=1
+@rr_sd   ... . .. rn:5 rd:5 _e esz=%esz_sd
+
 @rrr_h   ... rm:5 .. rn:5 rd:5  _e esz=1
 @rrr_sd  ... rm:5 .. rn:5 rd:5  _e esz=%esz_sd
 @rrr_hsd ... rm:5 .. rn:5 rd:5  _e esz=%esz_hsd
@@ -737,6 +741,11 @@ FRECPS_s0101 1110 0.1 . 1 1 . . 
@rrr_sd
 FRSQRTS_s   0101 1110 110 . 00111 1 . . @rrr_h
 FRSQRTS_s   0101 1110 1.1 . 1 1 . . @rrr_sd
 
+### Advanced SIMD scalar pairwise
+
+FADDP_s 0101 1110 0011  1101 10 . . @rr_h
+FADDP_s 0111 1110 0.11  1101 10 . . @rr_sd
+
 ### Advanced SIMD three same
 
 FADD_v  0.00 1110 010 . 00010 1 . . @qrrr_h
@@ -796,6 +805,9 @@ FRECPS_v0.00 1110 0.1 . 1 1 . . 
@qrrr_sd
 FRSQRTS_v   0.00 1110 110 . 00111 1 . . @qrrr_h
 FRSQRTS_v   0.00 1110 1.1 . 1 1 . . @qrrr_sd
 
+FADDP_v 0.10 1110 010 . 00010 1 . . @qrrr_h
+FADDP_v 0.10 1110 0.1 . 11010 1 . . @qrrr_sd
+
 ### Advanced SIMD scalar x indexed element
 
 FMUL_si 0101  00 ..  1001 . 0 . .   @rrx_h
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index d5828ba8df..f5ee6145b1 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5211,6 +5211,13 @@ static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] 
= {
 };
 TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
 
+static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
+gen_helper_gvec_faddp_h,
+gen_helper_gvec_faddp_s,
+gen_helper_gvec_faddp_d,
+};
+TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp)
+
 /*
  * Advanced SIMD scalar/vector x indexed element
  */
@@ -5396,6 +5403,56 @@ static bool do_fmla_vector_idx(DisasContext *s, 
arg_qrrx_e *a, bool neg)
 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
 
+/*
+ * Advanced SIMD scalar pairwise
+ */
+
+static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
+{
+switch (a->esz) {
+case MO_64:
+if (fp_access_check(s)) {
+TCGv_i64 t0 = tcg_temp_new_i64();
+TCGv_i64 t1 = tcg_temp_new_i64();
+
+read_vec_element(s, t0, a->rn, 0, MO_64);
+read_vec_element(s, t1, a->rn, 1, MO_64);
+f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+write_fp_dreg(s, a->rd, t0);
+}
+break;
+case MO_32:
+if (fp_access_check(s)) {
+TCGv_i32 t0 = tcg_temp_new_i32();
+TCGv_i32 t1 = tcg_temp_new_i32();
+
+read_vec_element_i32(s, t0, a->rn, 0, MO_32);
+read_vec_element_i32(s, t1, a->rn, 1, MO_32);
+f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
+write_fp_sreg(s, a->rd, t0);
+}
+break;
+case MO_16:
+if (!dc_isar_feature(aa64_fp16, s)) {
+return false;
+}
+if (fp_access_check(s)) {
+TCGv_i32 t0 = tcg_temp_new_i32();
+TCGv_i32 t1 = tcg_temp_new_i32();
+
+read_vec_element_i32(s, t0, a->rn, 0, MO_16);
+read_vec_element_i32(s, t1, a->rn, 1, MO_16);
+f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
+write_fp_sreg(s, a->rd, t0);
+}
+break;
+default:
+