On Wed, 2 Jul 2025 at 13:38, Richard Henderson <richard.hender...@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/arm/tcg/helper-sve.h | 8 + > target/arm/tcg/sve_helper.c | 317 +++++++++++++++++++++++++++++++++ > target/arm/tcg/translate-sve.c | 93 ++++++++++ > target/arm/tcg/sve.decode | 17 ++ > 4 files changed, 435 insertions(+)
Given how much code we end up with here for insns which are implemented in the pseudocode as a two-line loop, I think some more comments on what's going on would be helpful. I assume we're just specializing the arbitrary-length pack and unpack operations for element size and also vector length, but especially the bitwise operation magic could use some description of what it's doing. > diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h > index 733828a880..04b9545c11 100644 > --- a/target/arm/tcg/helper-sve.h > +++ b/target/arm/tcg/helper-sve.h > @@ -3020,3 +3020,11 @@ DEF_HELPER_FLAGS_4(sve2p1_andqv_b, TCG_CALL_NO_RWG, > void, ptr, ptr, ptr, i32) > DEF_HELPER_FLAGS_4(sve2p1_andqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) > DEF_HELPER_FLAGS_4(sve2p1_andqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) > DEF_HELPER_FLAGS_4(sve2p1_andqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) > + > +DEF_HELPER_FLAGS_3(pmov_pv_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) > +DEF_HELPER_FLAGS_3(pmov_pv_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) > +DEF_HELPER_FLAGS_3(pmov_pv_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) > + > +DEF_HELPER_FLAGS_3(pmov_vp_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) > +DEF_HELPER_FLAGS_3(pmov_vp_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) > +DEF_HELPER_FLAGS_3(pmov_vp_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) > diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c > index c41143468a..7af5c59fa5 100644 > --- a/target/arm/tcg/sve_helper.c > +++ b/target/arm/tcg/sve_helper.c > @@ -3035,6 +3035,323 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t > desc) > } > } > > +static uint64_t extractn(uint64_t *p, unsigned pos, unsigned len) > +{ > + uint64_t x; > + > + p += pos / 64; > + pos = pos % 64; > + > + x = p[0]; > + if (pos + len > 64) { > + x = (x >> pos) | (p[1] << (-pos & 63)); > + pos = 0; > + } > + return extract64(x, pos, len); > +} This gets moved to a different file in a later patch, so we should put it in that place from the start. > + > +static void depositn(uint64_t *p, unsigned pos, unsigned len, uint64_t val) This could use a brief doc comment, like the one extractn() gets in the later patch. > +{ > + p += pos / 64; > + pos = pos % 64; > + > + if (pos + len <= 64) { > + p[0] = deposit64(p[0], pos, len, val); > + } else { > + unsigned len0 = 64 - pos; > + unsigned len1 = len - len0; > + > + p[0] = deposit64(p[0], pos, len0, val); > + p[1] = deposit64(p[1], 0, len1, val >> len0); > + } > +} thanks -- PMM