On 8/3/21 6:14 AM, Richard Henderson wrote: > Rather than use 4-16 separate operations, use 2 operations > plus some byte reordering as necessary. > > Cc: Philippe Mathieu-Daudé <f4...@amsat.org> > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > target/mips/tcg/msa_helper.c | 201 +++++++++++++---------------------- > 1 file changed, 71 insertions(+), 130 deletions(-) > > diff --git a/target/mips/tcg/msa_helper.c b/target/mips/tcg/msa_helper.c > index a8880ce81c..e40c1b7057 100644 > --- a/target/mips/tcg/msa_helper.c > +++ b/target/mips/tcg/msa_helper.c > @@ -8218,47 +8218,31 @@ void helper_msa_ffint_u_df(CPUMIPSState *env, > uint32_t df, uint32_t wd, > #define MEMOP_IDX(DF) > #endif > > +#ifdef TARGET_WORDS_BIGENDIAN > +static inline uint64_t bswap16x4(uint64_t x) > +{ > + uint64_t m = 0x00ff00ff00ff00ffull; > + return ((x & m) << 8) | ((x >> 8) & m); > +} > + > +static inline uint64_t bswap32x2(uint64_t x) > +{ > + return ror64(bswap64(x), 32); > +} > +#endif
I'm trying to remove TARGET_WORDS_BIGENDIAN uses, so this would become: static inline bool is_cpu_bigendian(CPUMIPSState *) { return extract32(env->CP0_Config0, CP0C0_BE, 1); } static inline uint64_t bswap16x4(CPUMIPSState *env, uint64_t x) { if (is_cpu_bigendian(env)) { uint64_t m = 0x00ff00ff00ff00ffull; return ((x & m) << 8) | ((x >> 8) & m); } else { return x; } } static inline uint64_t bswap32x2(CPUMIPSState *env, uint64_t x) { if (is_cpu_bigendian(env)) { return ror64(bswap64(x), 32); } else { return x; } } And we can remove the other TARGET_WORDS_BIGENDIAN uses: > void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd, > @@ -8293,18 +8271,20 @@ void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd, > { > wr_t *pwd = &(env->active_fpu.fpr[wd].wr); > uintptr_t ra = GETPC(); > + uint64_t d0, d1; > > -#if !defined(HOST_WORDS_BIGENDIAN) > - pwd->w[0] = cpu_ldl_data_ra(env, addr + (0 << DF_WORD), ra); > - pwd->w[1] = cpu_ldl_data_ra(env, addr + (1 << DF_WORD), ra); > - pwd->w[2] = cpu_ldl_data_ra(env, addr + (2 << DF_WORD), ra); > - pwd->w[3] = cpu_ldl_data_ra(env, addr + (3 << DF_WORD), ra); > -#else > - pwd->w[0] = cpu_ldl_data_ra(env, addr + (1 << DF_WORD), ra); > - pwd->w[1] = cpu_ldl_data_ra(env, addr + (0 << DF_WORD), ra); > - pwd->w[2] = cpu_ldl_data_ra(env, addr + (3 << DF_WORD), ra); > - pwd->w[3] = cpu_ldl_data_ra(env, addr + (2 << DF_WORD), ra); > + /* > + * Load 8 bytes at a time. Use little-endian load, then for > + * big-endian target, we must then bswap the two words. > + */ > + d0 = cpu_ldq_le_data_ra(env, addr + 0, ra); > + d1 = cpu_ldq_le_data_ra(env, addr + 8, ra); > +#ifdef TARGET_WORDS_BIGENDIAN > + d0 = bswap32x2(d0); > + d1 = bswap32x2(d1); > #endif > + pwd->d[0] = d0; > + pwd->d[1] = d1; > } But can be done later, so: Reviewed-by: Philippe Mathieu-Daudé <f4...@amsat.org> (nice simplification BTW!).