Re: [PATCH v8 01/15] target/riscv: Refactor some of the generic vector functionality
On Wed, Jul 12, 2023 at 2:59 AM Max Chou wrote: > > From: Kiran Ostrolenk > > Take some functions/macros out of `vector_helper` and put them in a new > module called `vector_internals`. This ensures they can be used by both > vector and vector-crypto helpers (latter implemented in proceeding > commits). > > Signed-off-by: Kiran Ostrolenk > Reviewed-by: Weiwei Li > Signed-off-by: Max Chou Acked-by: Alistair Francis Alistair > --- > target/riscv/meson.build| 1 + > target/riscv/vector_helper.c| 201 +--- > target/riscv/vector_internals.c | 81 + > target/riscv/vector_internals.h | 182 + > 4 files changed, 265 insertions(+), 200 deletions(-) > create mode 100644 target/riscv/vector_internals.c > create mode 100644 target/riscv/vector_internals.h > > diff --git a/target/riscv/meson.build b/target/riscv/meson.build > index 7f56c5f88d4..c3801ee5e04 100644 > --- a/target/riscv/meson.build > +++ b/target/riscv/meson.build > @@ -16,6 +16,7 @@ riscv_ss.add(files( >'gdbstub.c', >'op_helper.c', >'vector_helper.c', > + 'vector_internals.c', >'bitmanip_helper.c', >'translate.c', >'m128_helper.c', > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index 71bb9b4457b..6434fd2f7e8 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -26,6 +26,7 @@ > #include "fpu/softfloat.h" > #include "tcg/tcg-gvec-desc.h" > #include "internals.h" > +#include "vector_internals.h" > #include > > target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, > @@ -72,68 +73,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, > target_ulong s1, > return vl; > } > > -/* > - * Note that vector data is stored in host-endian 64-bit chunks, > - * so addressing units smaller than that needs a host-endian fixup. > - */ > -#if HOST_BIG_ENDIAN > -#define H1(x) ((x) ^ 7) > -#define H1_2(x) ((x) ^ 6) > -#define H1_4(x) ((x) ^ 4) > -#define H2(x) ((x) ^ 3) > -#define H4(x) ((x) ^ 1) > -#define H8(x) ((x)) > -#else > -#define H1(x) (x) > -#define H1_2(x) (x) > -#define H1_4(x) (x) > -#define H2(x) (x) > -#define H4(x) (x) > -#define H8(x) (x) > -#endif > - > -static inline uint32_t vext_nf(uint32_t desc) > -{ > -return FIELD_EX32(simd_data(desc), VDATA, NF); > -} > - > -static inline uint32_t vext_vm(uint32_t desc) > -{ > -return FIELD_EX32(simd_data(desc), VDATA, VM); > -} > - > -/* > - * Encode LMUL to lmul as following: > - * LMULvlmullmul > - * 1 000 0 > - * 2 001 1 > - * 4 010 2 > - * 8 011 3 > - * - 100 - > - * 1/8 101 -3 > - * 1/4 110 -2 > - * 1/2 111 -1 > - */ > -static inline int32_t vext_lmul(uint32_t desc) > -{ > -return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); > -} > - > -static inline uint32_t vext_vta(uint32_t desc) > -{ > -return FIELD_EX32(simd_data(desc), VDATA, VTA); > -} > - > -static inline uint32_t vext_vma(uint32_t desc) > -{ > -return FIELD_EX32(simd_data(desc), VDATA, VMA); > -} > - > -static inline uint32_t vext_vta_all_1s(uint32_t desc) > -{ > -return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); > -} > - > /* > * Get the maximum number of elements can be operated. > * > @@ -152,21 +91,6 @@ static inline uint32_t vext_max_elems(uint32_t desc, > uint32_t log2_esz) > return scale < 0 ? vlenb >> -scale : vlenb << scale; > } > > -/* > - * Get number of total elements, including prestart, body and tail elements. > - * Note that when LMUL < 1, the tail includes the elements past VLMAX that > - * are held in the same vector register. > - */ > -static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t > desc, > -uint32_t esz) > -{ > -uint32_t vlenb = simd_maxsz(desc); > -uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); > -int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : > - ctzl(esz) - ctzl(sew) + vext_lmul(desc); > -return (vlenb << emul) / esz; > -} > - > static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) > { > return (addr & ~env->cur_pmmask) | env->cur_pmbase; > @@ -199,20 +123,6 @@ static void probe_pages(CPURISCVState *env, target_ulong > addr, > } > } > > -/* set agnostic elements to 1s */ > -static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, > - uint32_t tot) > -{ > -if (is_agnostic == 0) { > -/* policy undisturbed */ > -return; > -} > -if (tot - cnt == 0) { > -return; > -} > -memset(base + cnt, -1, tot - cnt); > -} > - > static inline void vext_set_elem_mask(void *v0, int index, >uint8_t value) > { > @@ -222,18 +132,6 @@ static
[PATCH v8 01/15] target/riscv: Refactor some of the generic vector functionality
From: Kiran Ostrolenk Take some functions/macros out of `vector_helper` and put them in a new module called `vector_internals`. This ensures they can be used by both vector and vector-crypto helpers (latter implemented in proceeding commits). Signed-off-by: Kiran Ostrolenk Reviewed-by: Weiwei Li Signed-off-by: Max Chou --- target/riscv/meson.build| 1 + target/riscv/vector_helper.c| 201 +--- target/riscv/vector_internals.c | 81 + target/riscv/vector_internals.h | 182 + 4 files changed, 265 insertions(+), 200 deletions(-) create mode 100644 target/riscv/vector_internals.c create mode 100644 target/riscv/vector_internals.h diff --git a/target/riscv/meson.build b/target/riscv/meson.build index 7f56c5f88d4..c3801ee5e04 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -16,6 +16,7 @@ riscv_ss.add(files( 'gdbstub.c', 'op_helper.c', 'vector_helper.c', + 'vector_internals.c', 'bitmanip_helper.c', 'translate.c', 'm128_helper.c', diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 71bb9b4457b..6434fd2f7e8 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -26,6 +26,7 @@ #include "fpu/softfloat.h" #include "tcg/tcg-gvec-desc.h" #include "internals.h" +#include "vector_internals.h" #include target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, @@ -72,68 +73,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, return vl; } -/* - * Note that vector data is stored in host-endian 64-bit chunks, - * so addressing units smaller than that needs a host-endian fixup. - */ -#if HOST_BIG_ENDIAN -#define H1(x) ((x) ^ 7) -#define H1_2(x) ((x) ^ 6) -#define H1_4(x) ((x) ^ 4) -#define H2(x) ((x) ^ 3) -#define H4(x) ((x) ^ 1) -#define H8(x) ((x)) -#else -#define H1(x) (x) -#define H1_2(x) (x) -#define H1_4(x) (x) -#define H2(x) (x) -#define H4(x) (x) -#define H8(x) (x) -#endif - -static inline uint32_t vext_nf(uint32_t desc) -{ -return FIELD_EX32(simd_data(desc), VDATA, NF); -} - -static inline uint32_t vext_vm(uint32_t desc) -{ -return FIELD_EX32(simd_data(desc), VDATA, VM); -} - -/* - * Encode LMUL to lmul as following: - * LMULvlmullmul - * 1 000 0 - * 2 001 1 - * 4 010 2 - * 8 011 3 - * - 100 - - * 1/8 101 -3 - * 1/4 110 -2 - * 1/2 111 -1 - */ -static inline int32_t vext_lmul(uint32_t desc) -{ -return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); -} - -static inline uint32_t vext_vta(uint32_t desc) -{ -return FIELD_EX32(simd_data(desc), VDATA, VTA); -} - -static inline uint32_t vext_vma(uint32_t desc) -{ -return FIELD_EX32(simd_data(desc), VDATA, VMA); -} - -static inline uint32_t vext_vta_all_1s(uint32_t desc) -{ -return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); -} - /* * Get the maximum number of elements can be operated. * @@ -152,21 +91,6 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) return scale < 0 ? vlenb >> -scale : vlenb << scale; } -/* - * Get number of total elements, including prestart, body and tail elements. - * Note that when LMUL < 1, the tail includes the elements past VLMAX that - * are held in the same vector register. - */ -static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, -uint32_t esz) -{ -uint32_t vlenb = simd_maxsz(desc); -uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); -int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : - ctzl(esz) - ctzl(sew) + vext_lmul(desc); -return (vlenb << emul) / esz; -} - static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) { return (addr & ~env->cur_pmmask) | env->cur_pmbase; @@ -199,20 +123,6 @@ static void probe_pages(CPURISCVState *env, target_ulong addr, } } -/* set agnostic elements to 1s */ -static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, - uint32_t tot) -{ -if (is_agnostic == 0) { -/* policy undisturbed */ -return; -} -if (tot - cnt == 0) { -return; -} -memset(base + cnt, -1, tot - cnt); -} - static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -222,18 +132,6 @@ static inline void vext_set_elem_mask(void *v0, int index, ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); } -/* - * Earlier designs (pre-0.9) had a varying number of bits - * per mask value (MLEN). In the 0.9 design, MLEN=1. - * (Section 4.5) - */ -static inline int vext_elem_mask(void *v0, int index) -{ -int idx = index / 64; -int pos = index % 64; -return (((uint64_t *)v0)[idx]