Re: [PATCH v8 01/15] target/riscv: Refactor some of the generic vector functionality

2023-07-18 Thread Alistair Francis
On Wed, Jul 12, 2023 at 2:59 AM Max Chou  wrote:
>
> From: Kiran Ostrolenk 
>
> Take some functions/macros out of `vector_helper` and put them in a new
> module called `vector_internals`. This ensures they can be used by both
> vector and vector-crypto helpers (latter implemented in proceeding
> commits).
>
> Signed-off-by: Kiran Ostrolenk 
> Reviewed-by: Weiwei Li 
> Signed-off-by: Max Chou 

Acked-by: Alistair Francis 

Alistair

> ---
>  target/riscv/meson.build|   1 +
>  target/riscv/vector_helper.c| 201 +---
>  target/riscv/vector_internals.c |  81 +
>  target/riscv/vector_internals.h | 182 +
>  4 files changed, 265 insertions(+), 200 deletions(-)
>  create mode 100644 target/riscv/vector_internals.c
>  create mode 100644 target/riscv/vector_internals.h
>
> diff --git a/target/riscv/meson.build b/target/riscv/meson.build
> index 7f56c5f88d4..c3801ee5e04 100644
> --- a/target/riscv/meson.build
> +++ b/target/riscv/meson.build
> @@ -16,6 +16,7 @@ riscv_ss.add(files(
>'gdbstub.c',
>'op_helper.c',
>'vector_helper.c',
> +  'vector_internals.c',
>'bitmanip_helper.c',
>'translate.c',
>'m128_helper.c',
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 71bb9b4457b..6434fd2f7e8 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -26,6 +26,7 @@
>  #include "fpu/softfloat.h"
>  #include "tcg/tcg-gvec-desc.h"
>  #include "internals.h"
> +#include "vector_internals.h"
>  #include 
>
>  target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
> @@ -72,68 +73,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, 
> target_ulong s1,
>  return vl;
>  }
>
> -/*
> - * Note that vector data is stored in host-endian 64-bit chunks,
> - * so addressing units smaller than that needs a host-endian fixup.
> - */
> -#if HOST_BIG_ENDIAN
> -#define H1(x)   ((x) ^ 7)
> -#define H1_2(x) ((x) ^ 6)
> -#define H1_4(x) ((x) ^ 4)
> -#define H2(x)   ((x) ^ 3)
> -#define H4(x)   ((x) ^ 1)
> -#define H8(x)   ((x))
> -#else
> -#define H1(x)   (x)
> -#define H1_2(x) (x)
> -#define H1_4(x) (x)
> -#define H2(x)   (x)
> -#define H4(x)   (x)
> -#define H8(x)   (x)
> -#endif
> -
> -static inline uint32_t vext_nf(uint32_t desc)
> -{
> -return FIELD_EX32(simd_data(desc), VDATA, NF);
> -}
> -
> -static inline uint32_t vext_vm(uint32_t desc)
> -{
> -return FIELD_EX32(simd_data(desc), VDATA, VM);
> -}
> -
> -/*
> - * Encode LMUL to lmul as following:
> - * LMULvlmullmul
> - *  1   000   0
> - *  2   001   1
> - *  4   010   2
> - *  8   011   3
> - *  -   100   -
> - * 1/8  101  -3
> - * 1/4  110  -2
> - * 1/2  111  -1
> - */
> -static inline int32_t vext_lmul(uint32_t desc)
> -{
> -return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
> -}
> -
> -static inline uint32_t vext_vta(uint32_t desc)
> -{
> -return FIELD_EX32(simd_data(desc), VDATA, VTA);
> -}
> -
> -static inline uint32_t vext_vma(uint32_t desc)
> -{
> -return FIELD_EX32(simd_data(desc), VDATA, VMA);
> -}
> -
> -static inline uint32_t vext_vta_all_1s(uint32_t desc)
> -{
> -return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
> -}
> -
>  /*
>   * Get the maximum number of elements can be operated.
>   *
> @@ -152,21 +91,6 @@ static inline uint32_t vext_max_elems(uint32_t desc, 
> uint32_t log2_esz)
>  return scale < 0 ? vlenb >> -scale : vlenb << scale;
>  }
>
> -/*
> - * Get number of total elements, including prestart, body and tail elements.
> - * Note that when LMUL < 1, the tail includes the elements past VLMAX that
> - * are held in the same vector register.
> - */
> -static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t 
> desc,
> -uint32_t esz)
> -{
> -uint32_t vlenb = simd_maxsz(desc);
> -uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
> -int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
> -  ctzl(esz) - ctzl(sew) + vext_lmul(desc);
> -return (vlenb << emul) / esz;
> -}
> -
>  static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
>  {
>  return (addr & ~env->cur_pmmask) | env->cur_pmbase;
> @@ -199,20 +123,6 @@ static void probe_pages(CPURISCVState *env, target_ulong 
> addr,
>  }
>  }
>
> -/* set agnostic elements to 1s */
> -static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
> -  uint32_t tot)
> -{
> -if (is_agnostic == 0) {
> -/* policy undisturbed */
> -return;
> -}
> -if (tot - cnt == 0) {
> -return;
> -}
> -memset(base + cnt, -1, tot - cnt);
> -}
> -
>  static inline void vext_set_elem_mask(void *v0, int index,
>uint8_t value)
>  {
> @@ -222,18 +132,6 @@ static 

[PATCH v8 01/15] target/riscv: Refactor some of the generic vector functionality

2023-07-11 Thread Max Chou
From: Kiran Ostrolenk 

Take some functions/macros out of `vector_helper` and put them in a new
module called `vector_internals`. This ensures they can be used by both
vector and vector-crypto helpers (latter implemented in proceeding
commits).

Signed-off-by: Kiran Ostrolenk 
Reviewed-by: Weiwei Li 
Signed-off-by: Max Chou 
---
 target/riscv/meson.build|   1 +
 target/riscv/vector_helper.c| 201 +---
 target/riscv/vector_internals.c |  81 +
 target/riscv/vector_internals.h | 182 +
 4 files changed, 265 insertions(+), 200 deletions(-)
 create mode 100644 target/riscv/vector_internals.c
 create mode 100644 target/riscv/vector_internals.h

diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index 7f56c5f88d4..c3801ee5e04 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -16,6 +16,7 @@ riscv_ss.add(files(
   'gdbstub.c',
   'op_helper.c',
   'vector_helper.c',
+  'vector_internals.c',
   'bitmanip_helper.c',
   'translate.c',
   'm128_helper.c',
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 71bb9b4457b..6434fd2f7e8 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -26,6 +26,7 @@
 #include "fpu/softfloat.h"
 #include "tcg/tcg-gvec-desc.h"
 #include "internals.h"
+#include "vector_internals.h"
 #include 
 
 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
@@ -72,68 +73,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong 
s1,
 return vl;
 }
 
-/*
- * Note that vector data is stored in host-endian 64-bit chunks,
- * so addressing units smaller than that needs a host-endian fixup.
- */
-#if HOST_BIG_ENDIAN
-#define H1(x)   ((x) ^ 7)
-#define H1_2(x) ((x) ^ 6)
-#define H1_4(x) ((x) ^ 4)
-#define H2(x)   ((x) ^ 3)
-#define H4(x)   ((x) ^ 1)
-#define H8(x)   ((x))
-#else
-#define H1(x)   (x)
-#define H1_2(x) (x)
-#define H1_4(x) (x)
-#define H2(x)   (x)
-#define H4(x)   (x)
-#define H8(x)   (x)
-#endif
-
-static inline uint32_t vext_nf(uint32_t desc)
-{
-return FIELD_EX32(simd_data(desc), VDATA, NF);
-}
-
-static inline uint32_t vext_vm(uint32_t desc)
-{
-return FIELD_EX32(simd_data(desc), VDATA, VM);
-}
-
-/*
- * Encode LMUL to lmul as following:
- * LMULvlmullmul
- *  1   000   0
- *  2   001   1
- *  4   010   2
- *  8   011   3
- *  -   100   -
- * 1/8  101  -3
- * 1/4  110  -2
- * 1/2  111  -1
- */
-static inline int32_t vext_lmul(uint32_t desc)
-{
-return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
-}
-
-static inline uint32_t vext_vta(uint32_t desc)
-{
-return FIELD_EX32(simd_data(desc), VDATA, VTA);
-}
-
-static inline uint32_t vext_vma(uint32_t desc)
-{
-return FIELD_EX32(simd_data(desc), VDATA, VMA);
-}
-
-static inline uint32_t vext_vta_all_1s(uint32_t desc)
-{
-return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
-}
-
 /*
  * Get the maximum number of elements can be operated.
  *
@@ -152,21 +91,6 @@ static inline uint32_t vext_max_elems(uint32_t desc, 
uint32_t log2_esz)
 return scale < 0 ? vlenb >> -scale : vlenb << scale;
 }
 
-/*
- * Get number of total elements, including prestart, body and tail elements.
- * Note that when LMUL < 1, the tail includes the elements past VLMAX that
- * are held in the same vector register.
- */
-static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
-uint32_t esz)
-{
-uint32_t vlenb = simd_maxsz(desc);
-uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
-int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
-  ctzl(esz) - ctzl(sew) + vext_lmul(desc);
-return (vlenb << emul) / esz;
-}
-
 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
 {
 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
@@ -199,20 +123,6 @@ static void probe_pages(CPURISCVState *env, target_ulong 
addr,
 }
 }
 
-/* set agnostic elements to 1s */
-static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
-  uint32_t tot)
-{
-if (is_agnostic == 0) {
-/* policy undisturbed */
-return;
-}
-if (tot - cnt == 0) {
-return;
-}
-memset(base + cnt, -1, tot - cnt);
-}
-
 static inline void vext_set_elem_mask(void *v0, int index,
   uint8_t value)
 {
@@ -222,18 +132,6 @@ static inline void vext_set_elem_mask(void *v0, int index,
 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
 }
 
-/*
- * Earlier designs (pre-0.9) had a varying number of bits
- * per mask value (MLEN). In the 0.9 design, MLEN=1.
- * (Section 4.5)
- */
-static inline int vext_elem_mask(void *v0, int index)
-{
-int idx = index / 64;
-int pos = index  % 64;
-return (((uint64_t *)v0)[idx]