On 16 May 2018 at 16:52, Richard Henderson <richard.hender...@linaro.org> wrote:
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index 731cf327a1..613598d090 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c

Just noticed, but in the 32-bit translator where the argument to
get_fpstatus_ptr() is "is this neon?" (ie "do we use the standard
FPSCR value"), shouldn't we be passing 'true' to get_fpstatus_ptr()
for the halfprec conversions in disas_neon_data_insn() ?

I haven't tested, but I imagine that otherwise you get the wrong
results if the input is a denormal and FPSCR.FZ is 0 or if the
output should be a NaN and FPSCR.DN is 0.

> @@ -7222,53 +7247,70 @@ static int disas_neon_data_insn(DisasContext *s, 
> uint32_t insn)
>                      }
>                      break;
>                  case NEON_2RM_VCVT_F16_F32:
> +                {
> +                    TCGv_ptr fpst;
> +                    TCGv_i32 ahp;
> +
>                      if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
>                          q || (rm & 1)) {
>                          return 1;
>                      }
>                      tmp = tcg_temp_new_i32();
>                      tmp2 = tcg_temp_new_i32();
> +                    fpst = get_fpstatus_ptr(false);
> +                    ahp = get_ahp_flag();
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
> -                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
> -                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
>                      tcg_gen_shli_i32(tmp2, tmp2, 16);
>                      tcg_gen_or_i32(tmp2, tmp2, tmp);
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
> -                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
>                      tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
>                      neon_store_reg(rd, 0, tmp2);
>                      tmp2 = tcg_temp_new_i32();
> -                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
> +                    gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
>                      tcg_gen_shli_i32(tmp2, tmp2, 16);
>                      tcg_gen_or_i32(tmp2, tmp2, tmp);
>                      neon_store_reg(rd, 1, tmp2);
>                      tcg_temp_free_i32(tmp);
> +                    tcg_temp_free_i32(ahp);
> +                    tcg_temp_free_ptr(fpst);
>                      break;
> +                }
>                  case NEON_2RM_VCVT_F32_F16:
> +                {
> +                    TCGv_ptr fpst;
> +                    TCGv_i32 ahp;
>                      if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
>                          q || (rd & 1)) {
>                          return 1;
>                      }
> +                    fpst = get_fpstatus_ptr(false);
> +                    ahp = get_ahp_flag();
>                      tmp3 = tcg_temp_new_i32();
>                      tmp = neon_load_reg(rm, 0);
>                      tmp2 = neon_load_reg(rm, 1);
>                      tcg_gen_ext16u_i32(tmp3, tmp);
> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
>                      tcg_gen_shri_i32(tmp3, tmp, 16);
> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
>                      tcg_temp_free_i32(tmp);
>                      tcg_gen_ext16u_i32(tmp3, tmp2);
> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
>                      tcg_gen_shri_i32(tmp3, tmp2, 16);
> -                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
> +                    gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
>                      tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
>                      tcg_temp_free_i32(tmp2);
>                      tcg_temp_free_i32(tmp3);
> +                    tcg_temp_free_i32(ahp);
> +                    tcg_temp_free_ptr(fpst);
>                      break;
> +                }
>                  case NEON_2RM_AESE: case NEON_2RM_AESMC:
>                      if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
>                          || ((rm | rd) & 1)) {
> --
> 2.17.0
>

thanks
-- PMM

Reply via email to