From: Frank Chang <frank.ch...@sifive.com> Signed-off-by: Frank Chang <frank.ch...@sifive.com> --- target/riscv/vector_helper.c | 118 ++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 50 deletions(-)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 39b9a462ab..2a006f956c 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -129,14 +129,32 @@ static inline uint32_t vext_vma(uint32_t desc) } /* - * Get vector group length in bytes. Its range is [64, 2048]. - * - * As simd_desc support at most 256, the max vlen is 512 bits. - * So vlen in bytes is encoded as maxsz. + * Get the maximum number of elements can be operated. */ -static inline uint32_t vext_maxsz(uint32_t desc) +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz, bool is_ldst) { - return simd_maxsz(desc) << vext_lmul(desc); + /* + * As simd_desc support at most 256, the max vlen is 512 bits, + * so vlen in bytes (vlenb) is encoded as maxsz. + */ + uint32_t vlenb = simd_maxsz(desc); + + if (is_ldst) { + /* + * Vector load/store instructions have the EEW encoded + * directly in the instructions. The maximum vector size is + * calculated with EMUL rather than LMUL. + */ + uint32_t eew = esz << 3; + uint32_t sew = vext_sew(desc); + float flmul = vext_vflmul(desc); + float emul = (float)eew / sew * flmul; + uint32_t emul_r = emul < 1 ? 1 : emul; + return vlenb * emul_r / esz; + } else { + /* Return VLMAX */ + return vlenb * vext_vflmul(desc) / esz; + } } /* @@ -296,7 +314,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, { uint32_t i, k; uint32_t nf = vext_nf(desc); - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t max_elems = vext_max_elems(desc, esz, true); uint32_t vta = vext_vta(desc); /* probe every access*/ @@ -314,15 +332,15 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base, } while (k < nf) { target_ulong addr = base + stride * i + k * esz; - ldst_elem(env, addr, i + k * vlmax, vd, ra); + ldst_elem(env, addr, i + k * max_elems, vd, ra); k++; } } /* clear tail elements */ if (clear_elem) { for (k = 0; k < nf; k++) { - clear_elem(vd, vta, env->vl + k * vlmax, - env->vl * esz, vlmax * esz); + clear_elem(vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); } } } @@ -371,7 +389,7 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, { uint32_t i, k; uint32_t nf = vext_nf(desc); - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t max_elems = vext_max_elems(desc, esz, true); uint32_t vta = vext_vta(desc); /* probe every access */ @@ -381,15 +399,15 @@ vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, k = 0; while (k < nf) { target_ulong addr = base + (i * nf + k) * esz; - ldst_elem(env, addr, i + k * vlmax, vd, ra); + ldst_elem(env, addr, i + k * max_elems, vd, ra); k++; } } /* clear tail elements */ if (clear_elem) { for (k = 0; k < nf; k++) { - clear_elem(vd, vta, env->vl + k * vlmax, - env->vl * esz, vlmax * esz); + clear_elem(vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); } } } @@ -472,7 +490,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, uint32_t i, k; uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t max_elems = vext_max_elems(desc, esz, true); uint32_t vta = vext_vta(desc); /* probe every access*/ @@ -491,15 +509,15 @@ vext_ldst_index(void *vd, void *v0, target_ulong base, } while (k < nf) { abi_ptr addr = get_index_addr(base, i, vs2) + k * esz; - ldst_elem(env, addr, i + k * vlmax, vd, ra); + ldst_elem(env, addr, i + k * max_elems, vd, ra); k++; } } /* clear tail elements */ if (clear_elem) { for (k = 0; k < nf; k++) { - clear_elem(vd, vta, env->vl + k * vlmax, - env->vl * esz, vlmax * esz); + clear_elem(vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); } } } @@ -570,7 +588,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, uint32_t i, k, vl = 0; uint32_t nf = vext_nf(desc); uint32_t vm = vext_vm(desc); - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t max_elems = vext_max_elems(desc, esz, true); uint32_t vta = vext_vta(desc); target_ulong addr, offset, remain; @@ -622,7 +640,7 @@ ProbeSuccess: } while (k < nf) { target_ulong addr = base + (i * nf + k) * esz; - ldst_elem(env, addr, i + k * vlmax, vd, ra); + ldst_elem(env, addr, i + k * max_elems, vd, ra); k++; } } @@ -631,8 +649,8 @@ ProbeSuccess: return; } for (k = 0; k < nf; k++) { - clear_elem(vd, vta, env->vl + k * vlmax, - env->vl * esz, vlmax * esz); + clear_elem(vd, vta, env->vl + k * max_elems, + env->vl * esz, max_elems * esz); } } @@ -659,7 +677,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, { uint32_t i, k; uint32_t nf = vext_nf(desc); - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t max_elems = vext_max_elems(desc, esz, true); /* probe every access */ probe_pages(env, base, env->vlenb * nf * esz, ra, access_type); @@ -669,7 +687,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, k = 0; while (k < nf) { target_ulong addr = base + (i * nf + k) * esz; - ldst_elem(env, addr, i + k * vlmax, vd, ra); + ldst_elem(env, addr, i + k * max_elems, vd, ra); k++; } } @@ -812,7 +830,7 @@ vext_amo_noatomic(void *vs3, void *v0, target_ulong base, target_long addr; uint32_t wd = vext_wd(desc); uint32_t vm = vext_vm(desc); - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t vlmax = vext_max_elems(desc, esz, false); uint32_t vta = vext_vta(desc); for (i = 0; i < env->vl; i++) { @@ -983,7 +1001,7 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, uint32_t esz, uint32_t dsz, opivv2_fn *fn, clear_fn *clearfn) { - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t vlmax = vext_max_elems(desc, esz, false); uint32_t vm = vext_vm(desc); uint32_t vta = vext_vta(desc); uint32_t vl = env->vl; @@ -995,7 +1013,7 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, } fn(vd, vs1, vs2, i); } - clearfn(vd, vta, vl, vl * dsz, vlmax * dsz); + clearfn(vd, vta, vl, vl * dsz, vlmax * dsz); } /* generate the helpers for OPIVV */ @@ -1048,7 +1066,7 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, uint32_t esz, uint32_t dsz, opivx2_fn fn, clear_fn *clearfn) { - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t vlmax = vext_max_elems(desc, esz, false); uint32_t vm = vext_vm(desc); uint32_t vta = vext_vta(desc); uint32_t vl = env->vl; @@ -1060,7 +1078,7 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, } fn(vd, s1, vs2, i); } - clearfn(vd, vta, vl, vl * dsz, vlmax * dsz); + clearfn(vd, vta, vl, vl * dsz, vlmax * dsz); } /* generate the helpers for OPIVX */ @@ -1247,7 +1265,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(ETYPE); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -1277,7 +1295,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(ETYPE); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -1339,7 +1357,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ void *vs2, CPURISCVState *env, uint32_t desc) \ { \ uint32_t vl = env->vl; \ - uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false);\ uint32_t i; \ \ for (i = 0; i < vl; i++) { \ @@ -1427,7 +1445,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(TS1); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -1465,7 +1483,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(TD); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -2108,7 +2126,7 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ { \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(ETYPE); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -2130,7 +2148,7 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ { \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(ETYPE); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -2151,7 +2169,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ { \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(ETYPE); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -2173,7 +2191,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ { \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(ETYPE); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -2234,7 +2252,7 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, uint32_t desc, uint32_t esz, uint32_t dsz, opivv2_rm_fn *fn, clear_fn *clearfn) { - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t vlmax = vext_max_elems(desc, esz, false); uint32_t vm = vext_vm(desc); uint32_t vta = vext_vta(desc); uint32_t vl = env->vl; @@ -2354,7 +2372,7 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, uint32_t desc, uint32_t esz, uint32_t dsz, opivx2_rm_fn *fn, clear_fn *clearfn) { - uint32_t vlmax = vext_maxsz(desc) / esz; + uint32_t vlmax = vext_max_elems(desc, esz, false); uint32_t vm = vext_vm(desc); uint32_t vta = vext_vta(desc); uint32_t vl = env->vl; @@ -3258,7 +3276,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t vlmax = vext_max_elems(desc, ESZ, false); \ uint32_t vm = vext_vm(desc); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ @@ -3293,7 +3311,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ void *vs2, CPURISCVState *env, \ uint32_t desc) \ { \ - uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t vlmax = vext_max_elems(desc, ESZ, false); \ uint32_t vm = vext_vm(desc); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ @@ -3864,7 +3882,7 @@ static void do_##NAME(void *vd, void *vs2, int i, \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t vlmax = vext_max_elems(desc, ESZ, false); \ uint32_t vm = vext_vm(desc); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ @@ -4041,7 +4059,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ { \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ - uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ + uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \ uint32_t i; \ \ for (i = 0; i < vl; i++) { \ @@ -4185,7 +4203,7 @@ static void do_##NAME(void *vd, void *vs2, int i) \ void HELPER(NAME)(void *vd, void *v0, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax = vext_maxsz(desc) / ESZ; \ + uint32_t vlmax = vext_max_elems(desc, ESZ, false); \ uint32_t vm = vext_vm(desc); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ @@ -4272,7 +4290,7 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ uint32_t vm = vext_vm(desc); \ uint32_t vl = env->vl; \ uint32_t esz = sizeof(ETYPE); \ - uint32_t vlmax = vext_maxsz(desc) / esz; \ + uint32_t vlmax = vext_max_elems(desc, esz, false); \ uint32_t vta = vext_vta(desc); \ uint32_t i; \ \ @@ -4772,7 +4790,7 @@ GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ + uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \ uint32_t vm = vext_vm(desc); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ @@ -4882,7 +4900,7 @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ + uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \ uint32_t vm = vext_vm(desc); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ @@ -4912,7 +4930,7 @@ GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq) void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ + uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \ uint32_t vm = vext_vm(desc); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ @@ -4942,7 +4960,7 @@ GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ CPURISCVState *env, uint32_t desc) \ { \ - uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ + uint32_t vlmax = vext_max_elems(desc, sizeof(ETYPE), false); \ uint32_t vta = vext_vta(desc); \ uint32_t vl = env->vl; \ uint32_t num = 0, i; \ -- 2.17.1