On Tue, Aug 19, 2025 at 09:23:38PM +0800, Chao Liu wrote: > Hi all, > > In this patch (v5), I've removed the redundant call to mark_vs_dirty(s) > within the gen_ldst_stride_main_loop() function. > > The reason for this change is that mark_vs_dirty(s) is already being called > at a higher level, making the call inside gen_ldst_stride_main_loop() > unnecessary.
Hey, nice patch. Do you have any performance numbers? I hit a problem with this being unable to deal with restarts. You left the existing heleprs in there that can deal with vstart != 0, so I guess you intended it to fall back, but it's not quite wired up right. I tried adding that in and it seems to work. Also made a little adjustment to your test case if you wouldn't mind changing that too. I have a tcg test for interrupted vector memory operations that caught this bug, I will submit it soon and cc you on it. Thanks, Nick [PATCH] target/riscv: Fix "Generate strided vector ld/st with tcg" If a strided vector memory access instruction has non-zero vstart, fall back to the helper functions rather than causing an illegal instruction trap. The vlse/vsse helpers were dead code before this. An implementation is permitted to cause an illegal instruction if vstart is not 0 and it is set to a value that can not be produced implicitly by the implementation, but memory accesses will generally always need to deal with page faults. This also adjusts the tcg test Makefile change to specify the cpu type on a per-test basis, because I have another test that needs different CPU options, and that gets broken if you change it this way. [ feel free to take changes or parts of the changelog and adjust / merge them into your patches ] Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- target/riscv/insn_trans/trans_rvv.c.inc | 37 ++++++++++++++++++++--- tests/tcg/riscv64/Makefile.softmmu-target | 3 +- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 5e200249ef..439ea0edcf 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1090,11 +1090,30 @@ static void gen_ldst_stride_tail_loop(DisasContext *s, TCGv dest, uint32_t nf, return; } +typedef void gen_helper_ldst_stride(TCGv_ptr, TCGv_ptr, TCGv, + TCGv, TCGv_env, TCGv_i32); + static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, - uint32_t data, DisasContext *s, bool is_load) + uint32_t data, gen_helper_ldst_stride *fn, + DisasContext *s, bool is_load) { if (!s->vstart_eq_zero) { - return false; + TCGv_ptr dest, mask; + TCGv base, stride; + TCGv_i32 desc; + + dest = tcg_temp_new_ptr(); + mask = tcg_temp_new_ptr(); + base = get_gpr(s, rs1, EXT_NONE); + stride = get_gpr(s, rs2, EXT_NONE); + desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, + s->cfg_ptr->vlenb, data)); + + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); + tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); + mark_vs_dirty(s); + fn(dest, mask, base, stride, tcg_env, desc); + return true; } TCGv dest = tcg_temp_new(); @@ -1146,6 +1165,16 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) { uint32_t data = 0; + gen_helper_ldst_stride *fn; + static gen_helper_ldst_stride *const fns[4] = { + gen_helper_vlse8_v, gen_helper_vlse16_v, + gen_helper_vlse32_v, gen_helper_vlse64_v + }; + + fn = fns[eew]; + if (fn == NULL) { + return false; + } uint8_t emul = vext_get_emul(s, eew); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -1153,7 +1182,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, NF, a->nf); data = FIELD_DP32(data, VDATA, VTA, s->vta); data = FIELD_DP32(data, VDATA, VMA, s->vma); - return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, true); + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, true); } static bool ld_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) @@ -1177,7 +1206,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) data = FIELD_DP32(data, VDATA, LMUL, emul); data = FIELD_DP32(data, VDATA, NF, a->nf); - return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, false); + return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, NULL, s, false); } static bool st_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) diff --git a/tests/tcg/riscv64/Makefile.softmmu-target b/tests/tcg/riscv64/Makefile.softmmu-target index f09f1a57c4..d9f067dbd4 100644 --- a/tests/tcg/riscv64/Makefile.softmmu-target +++ b/tests/tcg/riscv64/Makefile.softmmu-target @@ -14,7 +14,7 @@ CFLAGS += -march=rv64gcv -mabi=lp64d -g -Og %: %.o $(LINK_SCRIPT) $(LD) $(LDFLAGS) $< -o $@ -QEMU_OPTS += -M virt -cpu rv64,v=true -display none -semihosting -device loader,file= +QEMU_OPTS += -M virt -display none -semihosting -device loader,file= EXTRA_RUNS += run-issue1060 run-issue1060: issue1060 @@ -30,6 +30,7 @@ run-misa-ialign: misa-ialign $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) EXTRA_RUNS += run-vlsseg8e32 +run-vlsseg8e32: QEMU_OPTS := -cpu rv64,v=true $(QEMU_OPTS) run-vlsseg8e32: test-vlsseg8e32 $(call run-test, $<, $(QEMU) $(QEMU_OPTS)$<) -- 2.51.0