On Tue, Oct 28, 2025 at 4:28 AM Anton Johansson via <[email protected]> wrote: > > According to version 20250508 of the unprivileged specification: > - vtype: bits 0..7 used, bit XLEN-1 illegal, rest reserved > => fix to 64-bits. > > - vxsat: bit 0 used, vxrm which would occupy bits 1..2 is stored > separately, and bits 3..31 are set to 0 > => fix to 8-bits. > > - vxrm: 2 lowest bits are used for rounding mode, rest set to 0 > => fix to 8-bits. > > - vstart: maximum value of VLMAX-1, where VLMAX is at most 2^16 > => fix to 32-bits as vstart is mapped to a TCG global. > > - vl: maximum value of VLEN which is at most 2^16 > => fix to 32-bits as vl is mapped to a TCG global. > > Fields are shuffled for reduced padding. > > Note, the cpu/vector VMSTATE version is bumped, breaking migration from > older versions. > > Signed-off-by: Anton Johansson <[email protected]> > Reviewed-by: Pierrick Bouvier <[email protected]>
Acked-by: Alistair Francis <[email protected]> Alistair > --- > target/riscv/cpu.h | 12 ++++++------ > target/riscv/machine.c | 14 +++++++------- > target/riscv/translate.c | 12 ++++++++---- > target/riscv/vector_helper.c | 22 ++++++++++++++++++---- > target/riscv/insn_trans/trans_rvv.c.inc | 24 ++++++++++++------------ > 5 files changed, 51 insertions(+), 33 deletions(-) > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h > index 6b4edbfe9e..bd200ccad4 100644 > --- a/target/riscv/cpu.h > +++ b/target/riscv/cpu.h > @@ -192,7 +192,7 @@ FIELD(VTYPE, VSEW, 3, 3) > FIELD(VTYPE, VTA, 6, 1) > FIELD(VTYPE, VMA, 7, 1) > FIELD(VTYPE, VEDIV, 8, 2) > -FIELD(VTYPE, RESERVED, 10, sizeof(target_ulong) * 8 - 11) > +FIELD(VTYPE, RESERVED, 10, sizeof(uint64_t) * 8 - 11) > > typedef struct PMUCTRState { > /* Current value of a counter */ > @@ -218,11 +218,11 @@ struct CPUArchState { > > /* vector coprocessor state. */ > uint64_t vreg[32 * RV_VLEN_MAX / 64] QEMU_ALIGNED(16); > - target_ulong vxrm; > - target_ulong vxsat; > - target_ulong vl; > - target_ulong vstart; > - target_ulong vtype; > + uint64_t vtype; > + uint32_t vl; > + uint32_t vstart; > + uint8_t vxrm; > + uint8_t vxsat; > bool vill; > > target_ulong pc; > diff --git a/target/riscv/machine.c b/target/riscv/machine.c > index 7349383eab..440b09fc32 100644 > --- a/target/riscv/machine.c > +++ b/target/riscv/machine.c > @@ -137,16 +137,16 @@ static bool vector_needed(void *opaque) > > static const VMStateDescription vmstate_vector = { > .name = "cpu/vector", > - .version_id = 2, > - .minimum_version_id = 2, > + .version_id = 3, > + .minimum_version_id = 3, > .needed = vector_needed, > .fields = (const VMStateField[]) { > VMSTATE_UINT64_ARRAY(env.vreg, RISCVCPU, 32 * RV_VLEN_MAX / 64), > - VMSTATE_UINTTL(env.vxrm, RISCVCPU), > - VMSTATE_UINTTL(env.vxsat, RISCVCPU), > - VMSTATE_UINTTL(env.vl, RISCVCPU), > - VMSTATE_UINTTL(env.vstart, RISCVCPU), > - VMSTATE_UINTTL(env.vtype, RISCVCPU), > + VMSTATE_UINT64(env.vtype, RISCVCPU), > + VMSTATE_UINT32(env.vl, RISCVCPU), > + VMSTATE_UINT32(env.vstart, RISCVCPU), > + VMSTATE_UINT8(env.vxrm, RISCVCPU), > + VMSTATE_UINT8(env.vxsat, RISCVCPU), > VMSTATE_BOOL(env.vill, RISCVCPU), > VMSTATE_END_OF_LIST() > } > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index 8df1a2ed3c..15eee7f6ee 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -39,8 +39,9 @@ > #include "tcg/tcg-cpu.h" > > /* global register indices */ > -static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart; > +static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc; > static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */ > +static TCGv_i32 cpu_vl, cpu_vstart; > static TCGv load_res; > static TCGv load_val; > > @@ -1455,6 +1456,10 @@ void riscv_translate_init(void) > size_t field_offset = 0; > #endif > > + /* 32 bits in size, no offset needed */ > + size_t vl_offset = offsetof(CPURISCVState, vl); > + size_t vstart_offset = offsetof(CPURISCVState, vstart); > + > for (i = 1; i < 32; i++) { > cpu_gpr[i] = tcg_global_mem_new(tcg_env, > offsetof(CPURISCVState, gpr[i]) + field_offset, > @@ -1470,9 +1475,8 @@ void riscv_translate_init(void) > } > > cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, pc), "pc"); > - cpu_vl = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vl), "vl"); > - cpu_vstart = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, vstart), > - "vstart"); > + cpu_vl = tcg_global_mem_new_i32(tcg_env, vl_offset, "vl"); > + cpu_vstart = tcg_global_mem_new_i32(tcg_env, vstart_offset, "vstart"); > load_res = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_res), > "load_res"); > load_val = tcg_global_mem_new(tcg_env, offsetof(CPURISCVState, load_val), > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index 2de3358ee8..cf9a199566 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -360,6 +360,12 @@ vext_page_ldst_us(CPURISCVState *env, void *vd, > target_ulong addr, > uint32_t evl = env->vstart + elems; > MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; > > + /* > + * Maximum vector length is VLMAX == 2^16 == LMUL * VL / SEW, and > + * occurs for LMUL == 8, SEW == 8, VL == 2^16. > + */ > + g_assert(env->vstart < UINT16_MAX && UINT16_MAX - env->vstart >= elems); > + > /* Check page permission/pmp/watchpoint/etc. */ > probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags, > true); > @@ -2594,19 +2600,27 @@ static inline uint8_t get_round(int vxrm, uint64_t v, > uint8_t shift) > > d1 = extract64(v, shift - 1, 1); > D1 = extract64(v, 0, shift); > - if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ > + switch (vxrm) { > + case 0: > + /* round-to-nearest-up (add +0.5 LSB) */ > return d1; > - } else if (vxrm == 1) { /* round-to-nearest-even */ > + case 1: > + /* round-to-nearest-even */ > if (shift > 1) { > D2 = extract64(v, 0, shift - 1); > return d1 & ((D2 != 0) | d); > } else { > return d1 & d; > } > - } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ > + case 2: > + /* round-down (truncate) */ > + return 0; > + case 3: > + /* round-to-odd (OR bits into LSB, aka "jam") */ > return !d & (D1 != 0); > + default: > + g_assert_not_reached(); > } > - return 0; /* round-down (truncate) */ > } > > static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, > diff --git a/target/riscv/insn_trans/trans_rvv.c.inc > b/target/riscv/insn_trans/trans_rvv.c.inc > index 2a487179f6..32474a21dc 100644 > --- a/target/riscv/insn_trans/trans_rvv.c.inc > +++ b/target/riscv/insn_trans/trans_rvv.c.inc > @@ -194,7 +194,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, > TCGv s2) > > if (rd == 0 && rs1 == 0) { > s1 = tcg_temp_new(); > - tcg_gen_mov_tl(s1, cpu_vl); > + tcg_gen_ext_i32_tl(s1, cpu_vl); > } else if (rs1 == 0) { > /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ > s1 = tcg_constant_tl(RV_VLEN_MAX); > @@ -1213,9 +1213,9 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, > uint32_t nf, > MO_LE | MO_64 | atomicity); > } > if (i == size - 8) { > - tcg_gen_movi_tl(cpu_vstart, 0); > + tcg_gen_movi_i32(cpu_vstart, 0); > } else { > - tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 8 >> log2_esz); > + tcg_gen_addi_i32(cpu_vstart, cpu_vstart, 8 >> log2_esz); > } > } > } else { > @@ -1231,9 +1231,9 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, > uint32_t nf, > MO_LE | MO_32 | atomicity); > } > if (i == size - 4) { > - tcg_gen_movi_tl(cpu_vstart, 0); > + tcg_gen_movi_i32(cpu_vstart, 0); > } else { > - tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 4 >> log2_esz); > + tcg_gen_addi_i32(cpu_vstart, cpu_vstart, 4 >> log2_esz); > } > } > } > @@ -3459,7 +3459,7 @@ static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s > *a) > vec_element_loadi(s, t1, a->rs2, 0, true); > tcg_gen_trunc_i64_tl(dest, t1); > gen_set_gpr(s, a->rd, dest); > - tcg_gen_movi_tl(cpu_vstart, 0); > + tcg_gen_movi_i32(cpu_vstart, 0); > finalize_rvv_inst(s); > return true; > } > @@ -3476,7 +3476,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x > *a) > TCGv s1; > TCGLabel *over = gen_new_label(); > > - tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); > + tcg_gen_brcond_i32(TCG_COND_GEU, cpu_vstart, cpu_vl, over); > > t1 = tcg_temp_new_i64(); > > @@ -3488,7 +3488,7 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x > *a) > tcg_gen_ext_tl_i64(t1, s1); > vec_element_storei(s, a->rd, 0, t1); > gen_set_label(over); > - tcg_gen_movi_tl(cpu_vstart, 0); > + tcg_gen_movi_i32(cpu_vstart, 0); > finalize_rvv_inst(s); > return true; > } > @@ -3516,7 +3516,7 @@ static bool trans_vfmv_f_s(DisasContext *s, > arg_vfmv_f_s *a) > } > > mark_fs_dirty(s); > - tcg_gen_movi_tl(cpu_vstart, 0); > + tcg_gen_movi_i32(cpu_vstart, 0); > finalize_rvv_inst(s); > return true; > } > @@ -3536,7 +3536,7 @@ static bool trans_vfmv_s_f(DisasContext *s, > arg_vfmv_s_f *a) > TCGLabel *over = gen_new_label(); > > /* if vstart >= vl, skip vector register write back */ > - tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); > + tcg_gen_brcond_i32(TCG_COND_GEU, cpu_vstart, cpu_vl, over); > > /* NaN-box f[rs1] */ > t1 = tcg_temp_new_i64(); > @@ -3545,7 +3545,7 @@ static bool trans_vfmv_s_f(DisasContext *s, > arg_vfmv_s_f *a) > vec_element_storei(s, a->rd, 0, t1); > > gen_set_label(over); > - tcg_gen_movi_tl(cpu_vstart, 0); > + tcg_gen_movi_i32(cpu_vstart, 0); > finalize_rvv_inst(s); > return true; > } > @@ -3610,7 +3610,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) > \ > \ > fns[s->sew](dest, mask, src1, src2, tcg_env, desc); \ > \ > - tcg_gen_movi_tl(cpu_vstart, 0); \ > + tcg_gen_movi_i32(cpu_vstart, 0); \ > finalize_rvv_inst(s); \ > \ > return true; \ > -- > 2.51.0 > >
