While we don't require 16-byte atomicity here, using a single larger operation simplifies the code.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/translate-a64.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index d7d4b68328..edf92a728f 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -965,25 +965,20 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); - if (size < 4) { + if (size < MO_128) { mop = finalize_memop(s, size); tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); TCGv_i64 tmphi = tcg_temp_new_i64(); + TCGv_i128 t16 = tcg_temp_new_i128(); tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); - - mop = s->be_data | MO_UQ; - tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); - - tcg_temp_free_i64(tcg_hiaddr); + tcg_gen_concat_i64_i128(t16, tmplo, tmphi); tcg_temp_free_i64(tmphi); + + mop = finalize_memop(s, size); + tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); + tcg_temp_free_i128(t16); } tcg_temp_free_i64(tmplo); @@ -999,23 +994,18 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) TCGv_i64 tmphi = NULL; MemOp mop; - if (size < 4) { + if (size < MO_128) { mop = finalize_memop(s, size); tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr; + TCGv_i128 t16 = tcg_temp_new_i128(); + + mop = finalize_memop(s, size); + tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); tmphi = tcg_temp_new_i64(); - tcg_hiaddr = tcg_temp_new_i64(); - - mop = s->be_data | MO_UQ; - tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); - tcg_temp_free_i64(tcg_hiaddr); + tcg_gen_extr_i128_i64(tmplo, tmphi, t16); + tcg_temp_free_i128(t16); } tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); -- 2.34.1