From: "Lucas Mateus Castro (alqotel)"
This patch moves VMLADDUHM to decodetree a creates a gvec implementation
using mul_vec and add_vec.
reptloopmaster patch
8 12500 0,01810500 0,00903100 (-50.1%)
25 40000,01739400 0,00747700 (-57.0%)
100 10000,01843600 0,00901400 (-51.1%)
500 200 0,02574600 0,01971000 (-23.4%)
250040 0,05921600 0,07121800 (+20.3%)
800012 0,15326700 0,21725200 (+41.7%)
The significant difference in performance when REPT is low and LOOP is
high I think is due to the fact that the new implementation has a higher
translation time, as when using a helper only 5 TCGop are used but with
the patch a total of 10 TCGop are needed (Power lacks a direct mul_vec
equivalent so this instruction is implemented with the help of 5 others,
vmuleu, vmulou, vmrgh, vmrgl and vpkum).
Signed-off-by: Lucas Mateus Castro (alqotel)
---
target/ppc/helper.h | 2 +-
target/ppc/insn32.decode| 2 ++
target/ppc/int_helper.c | 3 +-
target/ppc/translate.c | 1 -
target/ppc/translate/vmx-impl.c.inc | 48 ++---
5 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 57eee07256..9c562ab00e 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -264,7 +264,7 @@ DEF_HELPER_FLAGS_4(VMSUMUHM, TCG_CALL_NO_RWG, void, avr,
avr, avr, avr)
DEF_HELPER_5(VMSUMUHS, void, env, avr, avr, avr, avr)
DEF_HELPER_FLAGS_4(VMSUMSHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr)
-DEF_HELPER_FLAGS_4(vmladduhm, TCG_CALL_NO_RWG, void, avr, avr, avr, avr)
+DEF_HELPER_FLAGS_5(VMLADDUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32)
DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32)
DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env)
DEF_HELPER_3(lvebx, void, env, avr, tl)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index a5249ee32c..7445455a12 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -693,6 +693,8 @@ VMSUMUHS000100 . . . . 100111 @VA
VMSUMCUD000100 . . . . 010111 @VA
VMSUMUDM000100 . . . . 100011 @VA
+VMLADDUHM 000100 . . . . 100010 @VA
+
## Vector String Instructions
VSTRIBL 000100 . 0 . . 001101 @VX_tb_rc
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 696096100b..0d25000b2a 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -974,7 +974,8 @@ void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r,
ppc_avr_t *a,
}
}
-void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
+void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
+ uint32_t v)
{
int i;
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index e810842925..11f729c60c 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6921,7 +6921,6 @@ GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x0001,
PPC_ALTIVEC),
GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x0001, PPC_ALTIVEC),
GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff, PPC_ALTIVEC),
-GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x, PPC_ALTIVEC),
#if defined(TARGET_PPC64)
GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x, PPC_NONE,
PPC2_ISA300),
diff --git a/target/ppc/translate/vmx-impl.c.inc
b/target/ppc/translate/vmx-impl.c.inc
index e644ad3236..9f18c6d4f2 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -2523,24 +2523,6 @@ static void glue(gen_, name0##_##name1)(DisasContext
*ctx) \
GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
-static void gen_vmladduhm(DisasContext *ctx)
-{
-TCGv_ptr ra, rb, rc, rd;
-if (unlikely(!ctx->altivec_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VPU);
-return;
-}
-ra = gen_avr_ptr(rA(ctx->opcode));
-rb = gen_avr_ptr(rB(ctx->opcode));
-rc = gen_avr_ptr(rC(ctx->opcode));
-rd = gen_avr_ptr(rD(ctx->opcode));
-gen_helper_vmladduhm(rd, ra, rb, rc);
-tcg_temp_free_ptr(ra);
-tcg_temp_free_ptr(rb);
-tcg_temp_free_ptr(rc);
-tcg_temp_free_ptr(rd);
-}
-
static bool do_va_helper(DisasContext *ctx, arg_VA *a,
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
{
@@ -2569,6 +2551,36 @@ TRANS_FLAGS2(ALTIVEC_207, VSUBECUQ, do_va_helper,
gen_helper_VSUBECUQ)
TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)
+static void gen_vmladduhm_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec
b,
+ TCGv_vec c)
+{
+tcg_gen_mul_vec(vece, t,