Re: [PATCH 20/57] target/arm: Convert FRECPS, FRSQRTS to decodetree

2024-05-23 Thread Peter Maydell
On Mon, 6 May 2024 at 02:07, Richard Henderson
 wrote:
>
> These are the last instructions within handle_3same_float
> and disas_simd_scalar_three_reg_same_fp16 so remove them.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/arm/tcg/a64.decode  |  12 ++
>  target/arm/tcg/translate-a64.c | 293 -
>  2 files changed, 46 insertions(+), 259 deletions(-)
>


Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH 20/57] target/arm: Convert FRECPS, FRSQRTS to decodetree

2024-05-05 Thread Richard Henderson
These are the last instructions within handle_3same_float
and disas_simd_scalar_three_reg_same_fp16 so remove them.

Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  12 ++
 target/arm/tcg/translate-a64.c | 293 -
 2 files changed, 46 insertions(+), 259 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 6aa6643d19..8d0a6a147e 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -731,6 +731,12 @@ FACGT_s 0111 1110 1.1 . 11101 1 . . 
@rrr_sd
 FABD_s  0111 1110 110 . 00010 1 . . @rrr_h
 FABD_s  0111 1110 1.1 . 11010 1 . . @rrr_sd
 
+FRECPS_s0101 1110 010 . 00111 1 . . @rrr_h
+FRECPS_s0101 1110 0.1 . 1 1 . . @rrr_sd
+
+FRSQRTS_s   0101 1110 110 . 00111 1 . . @rrr_h
+FRSQRTS_s   0101 1110 1.1 . 1 1 . . @rrr_sd
+
 ### Advanced SIMD three same
 
 FADD_v  0.00 1110 010 . 00010 1 . . @qrrr_h
@@ -784,6 +790,12 @@ FACGT_v 0.10 1110 1.1 . 11101 1 . . 
@qrrr_sd
 FABD_v  0.10 1110 110 . 00010 1 . . @qrrr_h
 FABD_v  0.10 1110 1.1 . 11010 1 . . @qrrr_sd
 
+FRECPS_v0.00 1110 010 . 00111 1 . . @qrrr_h
+FRECPS_v0.00 1110 0.1 . 1 1 . . @qrrr_sd
+
+FRSQRTS_v   0.00 1110 110 . 00111 1 . . @qrrr_h
+FRSQRTS_v   0.00 1110 1.1 . 1 1 . . @qrrr_sd
+
 ### Advanced SIMD scalar x indexed element
 
 FMUL_si 0101  00 ..  1001 . 0 . .   @rrx_h
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 5f5f62c907..d5828ba8df 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5036,6 +5036,20 @@ static const FPScalar f_scalar_fabd = {
 };
 TRANS(FABD_s, do_fp3_scalar, a, _scalar_fabd)
 
+static const FPScalar f_scalar_frecps = {
+gen_helper_recpsf_f16,
+gen_helper_recpsf_f32,
+gen_helper_recpsf_f64,
+};
+TRANS(FRECPS_s, do_fp3_scalar, a, _scalar_frecps)
+
+static const FPScalar f_scalar_frsqrts = {
+gen_helper_rsqrtsf_f16,
+gen_helper_rsqrtsf_f32,
+gen_helper_rsqrtsf_f64,
+};
+TRANS(FRSQRTS_s, do_fp3_scalar, a, _scalar_frsqrts)
+
 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
   gen_helper_gvec_3_ptr * const fns[3])
 {
@@ -5183,6 +5197,20 @@ static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
 };
 TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd)
 
+static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
+gen_helper_gvec_recps_h,
+gen_helper_gvec_recps_s,
+gen_helper_gvec_recps_d,
+};
+TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps)
+
+static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
+gen_helper_gvec_rsqrts_h,
+gen_helper_gvec_rsqrts_s,
+gen_helper_gvec_rsqrts_d,
+};
+TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts)
+
 /*
  * Advanced SIMD scalar/vector x indexed element
  */
@@ -9302,107 +9330,6 @@ static void handle_3same_64(DisasContext *s, int 
opcode, bool u,
 }
 }
 
-/* Handle the 3-same-operands float operations; shared by the scalar
- * and vector encodings. The caller must filter out any encodings
- * not allocated for the encoding it is dealing with.
- */
-static void handle_3same_float(DisasContext *s, int size, int elements,
-   int fpopcode, int rd, int rn, int rm)
-{
-int pass;
-TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
-
-for (pass = 0; pass < elements; pass++) {
-if (size) {
-/* Double */
-TCGv_i64 tcg_op1 = tcg_temp_new_i64();
-TCGv_i64 tcg_op2 = tcg_temp_new_i64();
-TCGv_i64 tcg_res = tcg_temp_new_i64();
-
-read_vec_element(s, tcg_op1, rn, pass, MO_64);
-read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
-switch (fpopcode) {
-case 0x1f: /* FRECPS */
-gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
-break;
-case 0x3f: /* FRSQRTS */
-gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
-break;
-default:
-case 0x18: /* FMAXNM */
-case 0x19: /* FMLA */
-case 0x1a: /* FADD */
-case 0x1b: /* FMULX */
-case 0x1c: /* FCMEQ */
-case 0x1e: /* FMAX */
-case 0x38: /* FMINNM */
-case 0x39: /* FMLS */
-case 0x3a: /* FSUB */
-case 0x3e: /* FMIN */
-case 0x5b: /* FMUL */
-case 0x5c: /* FCMGE */
-case 0x5d: /* FACGE */
-case 0x5f: /* FDIV */
-case 0x7a: /* FABD */
-case 0x7c: /* FCMGT */
-case 0x7d: /* FACGT */
-g_assert_not_reached();
-}
-
-write_vec_element(s,