From: Aleksandar Markovic <amarko...@wavecomp.com> Unroll loops in helpers for MSA logic instructions for better performance.
Signed-off-by: Aleksandar Markovic <amarko...@wavecomp.com> --- target/mips/msa_helper.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c index 851450c..f57c906 100644 --- a/target/mips/msa_helper.c +++ b/target/mips/msa_helper.c @@ -130,10 +130,6 @@ void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws, \ } \ } -MSA_FN_VECTOR(and_v, pwd->d[i], pws->d[i] & pwt->d[i]) -MSA_FN_VECTOR(or_v, pwd->d[i], pws->d[i] | pwt->d[i]) -MSA_FN_VECTOR(nor_v, pwd->d[i], ~(pws->d[i] | pwt->d[i])) -MSA_FN_VECTOR(xor_v, pwd->d[i], pws->d[i] ^ pwt->d[i]) MSA_FN_VECTOR(bmnz_v, pwd->d[i], BIT_MOVE_IF_NOT_ZERO(pwd->d[i], pws->d[i], pwt->d[i], DF_DOUBLE)) MSA_FN_VECTOR(bmz_v, pwd->d[i], @@ -145,6 +141,46 @@ MSA_FN_VECTOR(bsel_v, pwd->d[i], #undef BIT_SELECT #undef MSA_FN_VECTOR +void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = pws->d[0] & pwt->d[0]; + pwd->d[1] = pws->d[1] & pwt->d[1]; +} + +void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = pws->d[0] | pwt->d[0]; + pwd->d[1] = pws->d[1] | pwt->d[1]; +} + +void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = ~(pws->d[0] | pwt->d[0]); + pwd->d[1] = ~(pws->d[1] | pwt->d[1]); +} + +void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt) +{ + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); + wr_t *pws = &(env->active_fpu.fpr[ws].wr); + wr_t *pwt = &(env->active_fpu.fpr[wt].wr); + + pwd->d[0] = pws->d[0] ^ pwt->d[0]; + pwd->d[1] = pws->d[1] ^ pwt->d[1]; +} + static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2) { return arg1 + arg2; -- 2.7.4