On Fri, Mar 15, 2024 at 3:57 AM Daniel Henrique Barboza
<dbarb...@ventanamicro.com> wrote:
>
> Commit 8ff8ac6329 added a conditional to guard the vext_ldst_whole()
> helper if vstart >= evl. But by skipping the helper we're also not
> setting vstart = 0 at the end of the insns, which is incorrect.
>
> We'll move the conditional to vext_ldst_whole(), following in line with
> the removal of all brconds vstart >= vl that the next patch will do. The
> idea is to make the helpers responsible for their own vstart management.
>
> Fix ldst_whole isns by:
>
> - remove the brcond that skips the helper if vstart is >= evl;
>
> - vext_ldst_whole() now does an early exit with the same check, where
>   evl = (vlenb * nf) >> log2_esz, but the early exit will also clear
>   vstart.
>
> The 'width' param is now unneeded in ldst_whole_trans() and is also
> removed. It was used for the evl calculation for the brcond and has no
> other use now.  The 'width' is reflected in vext_ldst_whole() via
> log2_esz, which is encoded by GEN_VEXT_LD_WHOLE() as
> "ctzl(sizeof(ETYPE))".
>
> Suggested-by: Max Chou <max.c...@sifive.com>
> Fixes: 8ff8ac6329 ("target/riscv: rvv: Add missing early exit condition for 
> whole register load/store")
> Signed-off-by: Daniel Henrique Barboza <dbarb...@ventanamicro.com>

Reviewed-by: Alistair Francis <alistair.fran...@wdc.com>

Alistair

> ---
>  target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++--------------
>  target/riscv/vector_helper.c            |  5 +++
>  2 files changed, 28 insertions(+), 29 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> b/target/riscv/insn_trans/trans_rvv.c.inc
> index 52c26a7834..1366445e1f 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -1097,13 +1097,9 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, 
> ld_us_check)
>  typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32);
>
>  static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
> -                             uint32_t width, gen_helper_ldst_whole *fn,
> +                             gen_helper_ldst_whole *fn,
>                               DisasContext *s)
>  {
> -    uint32_t evl = s->cfg_ptr->vlenb * nf / width;
> -    TCGLabel *over = gen_new_label();
> -    tcg_gen_brcondi_tl(TCG_COND_GEU, cpu_vstart, evl, over);
> -
>      TCGv_ptr dest;
>      TCGv base;
>      TCGv_i32 desc;
> @@ -1120,8 +1116,6 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, 
> uint32_t nf,
>
>      fn(dest, base, tcg_env, desc);
>
> -    gen_set_label(over);
> -
>      return true;
>  }
>
> @@ -1129,42 +1123,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t 
> rs1, uint32_t nf,
>   * load and store whole register instructions ignore vtype and vl setting.
>   * Thus, we don't need to check vill bit. (Section 7.9)
>   */
> -#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, WIDTH)               \
> +#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF)                                \
>  static bool trans_##NAME(DisasContext *s, arg_##NAME * a)                 \
>  {                                                                         \
>      if (require_rvv(s) &&                                                 \
>          QEMU_IS_ALIGNED(a->rd, ARG_NF)) {                                 \
> -        return ldst_whole_trans(a->rd, a->rs1, ARG_NF, WIDTH,             \
> +        return ldst_whole_trans(a->rd, a->rs1, ARG_NF,                    \
>                                  gen_helper_##NAME, s);                    \
>      }                                                                     \
>      return false;                                                         \
>  }
>
> -GEN_LDST_WHOLE_TRANS(vl1re8_v,  1, 1)
> -GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, 2)
> -GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, 4)
> -GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, 8)
> -GEN_LDST_WHOLE_TRANS(vl2re8_v,  2, 1)
> -GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, 2)
> -GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, 4)
> -GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, 8)
> -GEN_LDST_WHOLE_TRANS(vl4re8_v,  4, 1)
> -GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, 2)
> -GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, 4)
> -GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, 8)
> -GEN_LDST_WHOLE_TRANS(vl8re8_v,  8, 1)
> -GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, 2)
> -GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, 4)
> -GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, 8)
> +GEN_LDST_WHOLE_TRANS(vl1re8_v,  1)
> +GEN_LDST_WHOLE_TRANS(vl1re16_v, 1)
> +GEN_LDST_WHOLE_TRANS(vl1re32_v, 1)
> +GEN_LDST_WHOLE_TRANS(vl1re64_v, 1)
> +GEN_LDST_WHOLE_TRANS(vl2re8_v,  2)
> +GEN_LDST_WHOLE_TRANS(vl2re16_v, 2)
> +GEN_LDST_WHOLE_TRANS(vl2re32_v, 2)
> +GEN_LDST_WHOLE_TRANS(vl2re64_v, 2)
> +GEN_LDST_WHOLE_TRANS(vl4re8_v,  4)
> +GEN_LDST_WHOLE_TRANS(vl4re16_v, 4)
> +GEN_LDST_WHOLE_TRANS(vl4re32_v, 4)
> +GEN_LDST_WHOLE_TRANS(vl4re64_v, 4)
> +GEN_LDST_WHOLE_TRANS(vl8re8_v,  8)
> +GEN_LDST_WHOLE_TRANS(vl8re16_v, 8)
> +GEN_LDST_WHOLE_TRANS(vl8re32_v, 8)
> +GEN_LDST_WHOLE_TRANS(vl8re64_v, 8)
>
>  /*
>   * The vector whole register store instructions are encoded similar to
>   * unmasked unit-stride store of elements with EEW=8.
>   */
> -GEN_LDST_WHOLE_TRANS(vs1r_v, 1, 1)
> -GEN_LDST_WHOLE_TRANS(vs2r_v, 2, 1)
> -GEN_LDST_WHOLE_TRANS(vs4r_v, 4, 1)
> -GEN_LDST_WHOLE_TRANS(vs8r_v, 8, 1)
> +GEN_LDST_WHOLE_TRANS(vs1r_v, 1)
> +GEN_LDST_WHOLE_TRANS(vs2r_v, 2)
> +GEN_LDST_WHOLE_TRANS(vs4r_v, 4)
> +GEN_LDST_WHOLE_TRANS(vs8r_v, 8)
>
>  /*
>   *** Vector Integer Arithmetic Instructions
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index bcc553c0e2..1f4c276b21 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -572,6 +572,11 @@ vext_ldst_whole(void *vd, target_ulong base, 
> CPURISCVState *env, uint32_t desc,
>      uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
>      uint32_t max_elems = vlenb >> log2_esz;
>
> +    if (env->vstart >= ((vlenb * nf) >> log2_esz)) {
> +        env->vstart = 0;
> +        return;
> +    }
> +
>      k = env->vstart / max_elems;
>      off = env->vstart % max_elems;
>
> --
> 2.44.0
>
>

Reply via email to