On 2020/8/8 4:24, Chih-Min Chao wrote:
On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <richard.hender...@linaro.org <mailto:richard.hender...@linaro.org>> wrote:

    If a 32-bit input is not properly nanboxed, then the input is replaced
    with the default qnan.  The only inline expansion is for the
    sign-changing
    set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

    Signed-off-by: Richard Henderson <richard.hender...@linaro.org
    <mailto:richard.hender...@linaro.org>>
    ---
     target/riscv/insn_trans/trans_rvf.inc.c | 71
    +++++++++++++++++++------
     target/riscv/translate.c                | 18 +++++++
     2 files changed, 73 insertions(+), 16 deletions(-)

    diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
    b/target/riscv/insn_trans/trans_rvf.inc.c
    index 264d3139f1..f9a9e0643a 100644
    --- a/target/riscv/insn_trans/trans_rvf.inc.c
    +++ b/target/riscv/insn_trans/trans_rvf.inc.c
    @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx,
    arg_fsgnj_s *a)
     {
         REQUIRE_FPU;
         REQUIRE_EXT(ctx, RVF);
    +
         if (a->rs1 == a->rs2) { /* FMOV */
    -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
    +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
         } else { /* FSGNJ */
    -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],
    cpu_fpr[a->rs1],
    -                            0, 31);
    +        TCGv_i64 rs1 = tcg_temp_new_i64();
    +        TCGv_i64 rs2 = tcg_temp_new_i64();
    +
    +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
    +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
    +
    +        /* This formulation retains the nanboxing of rs2. */
    +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
    +        tcg_temp_free_i64(rs1);
    +        tcg_temp_free_i64(rs2);
         }
    -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
         mark_fs_dirty(ctx);
         return true;
     }

     static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
     {
    +    TCGv_i64 rs1, rs2, mask;
    +
         REQUIRE_FPU;
         REQUIRE_EXT(ctx, RVF);
    +
    +    rs1 = tcg_temp_new_i64();
    +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
    +
         if (a->rs1 == a->rs2) { /* FNEG */
    -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
    +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31,
    1));
         } else {
    -        TCGv_i64 t0 = tcg_temp_new_i64();
    -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
    -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1],
    0, 31);
    -        tcg_temp_free_i64(t0);
    +        rs2 = tcg_temp_new_i64();
    +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
    +
    +        /*
    +         * Replace bit 31 in rs1 with inverse in rs2.
    +         * This formulation retains the nanboxing of rs1.
    +         */
    +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
    +        tcg_gen_andc_i64(rs2, mask, rs2);


should be
              tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2
              tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be inverted to get only sign
Hi Chih-Min,

Thanks for pointing it out. It's a bug here. However, I think it should be

tcg_gen_andc_i64(rs2, rs2, mask);  // only get rs2 bit 31

tcg_gen_not_i64(rs2, rs2);  // inverse rs2


Best Regards,
Zhiwei

 Chih-Min Chao

    +        tcg_gen_and_i64(rs1, mask, rs1);
    +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
    +
    +        tcg_temp_free_i64(mask);
    +        tcg_temp_free_i64(rs2);
         }
    -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
    +    tcg_temp_free_i64(rs1);
    +
         mark_fs_dirty(ctx);
         return true;
     }

     static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
     {
    +    TCGv_i64 rs1, rs2;
    +
         REQUIRE_FPU;
         REQUIRE_EXT(ctx, RVF);
    +
    +    rs1 = tcg_temp_new_i64();
    +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
    +
         if (a->rs1 == a->rs2) { /* FABS */
    -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1],
    ~INT32_MIN);
    +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1,
    ~MAKE_64BIT_MASK(31, 1));
         } else {
    -        TCGv_i64 t0 = tcg_temp_new_i64();
    -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
    -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
    -        tcg_temp_free_i64(t0);
    +        rs2 = tcg_temp_new_i64();
    +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
    +
    +        /*
    +         * Xor bit 31 in rs1 with that in rs2.
    +         * This formulation retains the nanboxing of rs1.
    +         */
    +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
    +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
    +
    +        tcg_temp_free_i64(rs2);
         }
    -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
    +    tcg_temp_free_i64(rs1);
    +
         mark_fs_dirty(ctx);
         return true;
     }
    diff --git a/target/riscv/translate.c b/target/riscv/translate.c
    index 12a746da97..bf35182776 100644
    --- a/target/riscv/translate.c
    +++ b/target/riscv/translate.c
    @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out,
    TCGv_i64 in)
         tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
     }

    +/*
    + * A narrow n-bit operation, where n < FLEN, checks that input
    operands
    + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
    + * If so, the least-significant bits of the input are used,
    otherwise the
    + * input value is treated as an n-bit canonical NaN (v2.2 section
    9.2).
    + *
    + * Here, the result is always nan-boxed, even the canonical nan.
    + */
    +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
    +{
    +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
    +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
    +
    +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
    +    tcg_temp_free_i64(t_max);
    +    tcg_temp_free_i64(t_nan);
    +}
    +
     static void generate_exception(DisasContext *ctx, int excp)
     {
         tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
-- 2.25.1



Reply via email to