On Wed, 2 Jul 2025 at 13:38, Richard Henderson
<richard.hender...@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
> ---
>  target/arm/tcg/helper-sve.h    |   8 +
>  target/arm/tcg/sve_helper.c    | 317 +++++++++++++++++++++++++++++++++
>  target/arm/tcg/translate-sve.c |  93 ++++++++++
>  target/arm/tcg/sve.decode      |  17 ++
>  4 files changed, 435 insertions(+)

Given how much code we end up with here for insns which are
implemented in the pseudocode as a two-line loop, I think
some more comments on what's going on would be helpful.
I assume we're just specializing the arbitrary-length
pack and unpack operations for element size and also vector
length, but especially the bitwise operation magic could
use some description of what it's doing.

> diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
> index 733828a880..04b9545c11 100644
> --- a/target/arm/tcg/helper-sve.h
> +++ b/target/arm/tcg/helper-sve.h
> @@ -3020,3 +3020,11 @@ DEF_HELPER_FLAGS_4(sve2p1_andqv_b, TCG_CALL_NO_RWG, 
> void, ptr, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_4(sve2p1_andqv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_4(sve2p1_andqv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_4(sve2p1_andqv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
> +
> +DEF_HELPER_FLAGS_3(pmov_pv_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_3(pmov_pv_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_3(pmov_pv_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
> +
> +DEF_HELPER_FLAGS_3(pmov_vp_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_3(pmov_vp_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_3(pmov_vp_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
> diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
> index c41143468a..7af5c59fa5 100644
> --- a/target/arm/tcg/sve_helper.c
> +++ b/target/arm/tcg/sve_helper.c
> @@ -3035,6 +3035,323 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t 
> desc)
>      }
>  }
>
> +static uint64_t extractn(uint64_t *p, unsigned pos, unsigned len)
> +{
> +    uint64_t x;
> +
> +    p += pos / 64;
> +    pos = pos % 64;
> +
> +    x = p[0];
> +    if (pos + len > 64) {
> +        x = (x >> pos) | (p[1] << (-pos & 63));
> +        pos = 0;
> +    }
> +    return extract64(x, pos, len);
> +}

This gets moved to a different file in a later patch, so we should
put it in that place from the start.

> +
> +static void depositn(uint64_t *p, unsigned pos, unsigned len, uint64_t val)

This could use a brief doc comment, like the one extractn() gets in
the later patch.

> +{
> +    p += pos / 64;
> +    pos = pos % 64;
> +
> +    if (pos + len <= 64) {
> +        p[0] = deposit64(p[0], pos, len, val);
> +    } else {
> +        unsigned len0 = 64 - pos;
> +        unsigned len1 = len - len0;
> +
> +        p[0] = deposit64(p[0], pos, len0, val);
> +        p[1] = deposit64(p[1], 0, len1, val >> len0);
> +    }
> +}

thanks
-- PMM

Reply via email to