Re: [Qemu-devel] [PATCH RESEND v2 14/17] target-ppc: improve lxvw4x implementation

2016-09-15 Thread Nikunj A Dadhania
David Gibson  writes:

> [ Unknown signature status ]
> On Mon, Sep 12, 2016 at 12:11:43PM +0530, Nikunj A Dadhania wrote:
>> Load 8byte at a time and manipulate.
>> 
>> Signed-off-by: Nikunj A Dadhania 
>> ---
>>  target-ppc/helper.h |  1 +
>>  target-ppc/mem_helper.c |  5 +
>>  target-ppc/translate/vsx-impl.inc.c | 34 --
>>  3 files changed, 26 insertions(+), 14 deletions(-)
>> 
>> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> index 966f2ce..1bbeac4 100644
>> --- a/target-ppc/helper.h
>> +++ b/target-ppc/helper.h
>> @@ -297,6 +297,7 @@ DEF_HELPER_2(mtvscr, void, env, avr)
>>  DEF_HELPER_3(lvebx, void, env, avr, tl)
>>  DEF_HELPER_3(lvehx, void, env, avr, tl)
>>  DEF_HELPER_3(lvewx, void, env, avr, tl)
>> +DEF_HELPER_1(bswap32x2, i64, i64)
>>  DEF_HELPER_3(stvebx, void, env, avr, tl)
>>  DEF_HELPER_3(stvehx, void, env, avr, tl)
>>  DEF_HELPER_3(stvewx, void, env, avr, tl)
>> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
>> index 6548715..a56051a 100644
>> --- a/target-ppc/mem_helper.c
>> +++ b/target-ppc/mem_helper.c
>> @@ -285,6 +285,11 @@ STVE(stvewx, cpu_stl_data_ra, bswap32, u32)
>>  #undef I
>>  #undef LVE
>>  
>> +uint64_t helper_bswap32x2(uint64_t x)
>> +{
>> +return deposit64((x >> 32), 32, 32, (x));
>> +}
>> +
>>  #undef HI_IDX
>>  #undef LO_IDX
>>  
>> diff --git a/target-ppc/translate/vsx-impl.inc.c 
>> b/target-ppc/translate/vsx-impl.inc.c
>> index eee6052..e3374df 100644
>> --- a/target-ppc/translate/vsx-impl.inc.c
>> +++ b/target-ppc/translate/vsx-impl.inc.c
>> @@ -75,7 +75,7 @@ static void gen_lxvdsx(DisasContext *ctx)
>>  static void gen_lxvw4x(DisasContext *ctx)
>>  {
>>  TCGv EA;
>> -TCGv_i64 tmp;
>> +TCGv_i64 t0, t1;
>>  TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
>>  TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
>>  if (unlikely(!ctx->vsx_enabled)) {
>> @@ -84,22 +84,28 @@ static void gen_lxvw4x(DisasContext *ctx)
>>  }
>>  gen_set_access_type(ctx, ACCESS_INT);
>>  EA = tcg_temp_new();
>> -tmp = tcg_temp_new_i64();
>>  
>>  gen_addr_reg_index(ctx, EA);
>> -gen_qemu_ld32u_i64(ctx, tmp, EA);
>> -tcg_gen_addi_tl(EA, EA, 4);
>> -gen_qemu_ld32u_i64(ctx, xth, EA);
>> -tcg_gen_deposit_i64(xth, xth, tmp, 32, 32);
>> -
>> -tcg_gen_addi_tl(EA, EA, 4);
>> -gen_qemu_ld32u_i64(ctx, tmp, EA);
>> -tcg_gen_addi_tl(EA, EA, 4);
>> -gen_qemu_ld32u_i64(ctx, xtl, EA);
>> -tcg_gen_deposit_i64(xtl, xtl, tmp, 32, 32);
>> -
>> +if (ctx->le_mode) {
>> +t0 = tcg_temp_new_i64();
>> +t1 = tcg_temp_new_i64();
>> +tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
>> +tcg_gen_shri_i64(t1, t0, 32);
>> +tcg_gen_deposit_i64(xth, t1, t0, 32, 32);
>> +tcg_gen_addi_tl(EA, EA, 8);
>> +tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
>> +tcg_gen_shri_i64(t1, t0, 32);
>> +tcg_gen_deposit_i64(xtl, t1, t0, 32, 32);
>> +tcg_temp_free_i64(t0);
>> +tcg_temp_free_i64(t1);
>> +} else {
>> +tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ);
>> +gen_helper_bswap32x2(xth, xth);
>> +tcg_gen_addi_tl(EA, EA, 8);
>> +tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ);
>> +gen_helper_bswap32x2(xtl, xtl);
>> +}
>
> So.. for starters using a helper for just one endianness is kind of
> ugly.  But for going on with, it looks like the two paths are doing
> the same thing, just one is doing it with TCG ops and the other via a
> helper.

Right, while iterating and optimizing logic for BE, I got it same as LE ;-)
We can just use BE. Following is good enough for both BE/LE.

+tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ);
+gen_helper_bswap32x2(xth, xth);
+tcg_gen_addi_tl(EA, EA, 8);
+tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ);
+gen_helper_bswap32x2(xtl, xtl);

Regards,
Nikunj




Re: [Qemu-devel] [PATCH RESEND v2 14/17] target-ppc: improve lxvw4x implementation

2016-09-14 Thread David Gibson
On Mon, Sep 12, 2016 at 12:11:43PM +0530, Nikunj A Dadhania wrote:
> Load 8byte at a time and manipulate.
> 
> Signed-off-by: Nikunj A Dadhania 
> ---
>  target-ppc/helper.h |  1 +
>  target-ppc/mem_helper.c |  5 +
>  target-ppc/translate/vsx-impl.inc.c | 34 --
>  3 files changed, 26 insertions(+), 14 deletions(-)
> 
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 966f2ce..1bbeac4 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -297,6 +297,7 @@ DEF_HELPER_2(mtvscr, void, env, avr)
>  DEF_HELPER_3(lvebx, void, env, avr, tl)
>  DEF_HELPER_3(lvehx, void, env, avr, tl)
>  DEF_HELPER_3(lvewx, void, env, avr, tl)
> +DEF_HELPER_1(bswap32x2, i64, i64)
>  DEF_HELPER_3(stvebx, void, env, avr, tl)
>  DEF_HELPER_3(stvehx, void, env, avr, tl)
>  DEF_HELPER_3(stvewx, void, env, avr, tl)
> diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
> index 6548715..a56051a 100644
> --- a/target-ppc/mem_helper.c
> +++ b/target-ppc/mem_helper.c
> @@ -285,6 +285,11 @@ STVE(stvewx, cpu_stl_data_ra, bswap32, u32)
>  #undef I
>  #undef LVE
>  
> +uint64_t helper_bswap32x2(uint64_t x)
> +{
> +return deposit64((x >> 32), 32, 32, (x));
> +}
> +
>  #undef HI_IDX
>  #undef LO_IDX
>  
> diff --git a/target-ppc/translate/vsx-impl.inc.c 
> b/target-ppc/translate/vsx-impl.inc.c
> index eee6052..e3374df 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -75,7 +75,7 @@ static void gen_lxvdsx(DisasContext *ctx)
>  static void gen_lxvw4x(DisasContext *ctx)
>  {
>  TCGv EA;
> -TCGv_i64 tmp;
> +TCGv_i64 t0, t1;
>  TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
>  TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
>  if (unlikely(!ctx->vsx_enabled)) {
> @@ -84,22 +84,28 @@ static void gen_lxvw4x(DisasContext *ctx)
>  }
>  gen_set_access_type(ctx, ACCESS_INT);
>  EA = tcg_temp_new();
> -tmp = tcg_temp_new_i64();
>  
>  gen_addr_reg_index(ctx, EA);
> -gen_qemu_ld32u_i64(ctx, tmp, EA);
> -tcg_gen_addi_tl(EA, EA, 4);
> -gen_qemu_ld32u_i64(ctx, xth, EA);
> -tcg_gen_deposit_i64(xth, xth, tmp, 32, 32);
> -
> -tcg_gen_addi_tl(EA, EA, 4);
> -gen_qemu_ld32u_i64(ctx, tmp, EA);
> -tcg_gen_addi_tl(EA, EA, 4);
> -gen_qemu_ld32u_i64(ctx, xtl, EA);
> -tcg_gen_deposit_i64(xtl, xtl, tmp, 32, 32);
> -
> +if (ctx->le_mode) {
> +t0 = tcg_temp_new_i64();
> +t1 = tcg_temp_new_i64();
> +tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
> +tcg_gen_shri_i64(t1, t0, 32);
> +tcg_gen_deposit_i64(xth, t1, t0, 32, 32);
> +tcg_gen_addi_tl(EA, EA, 8);
> +tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
> +tcg_gen_shri_i64(t1, t0, 32);
> +tcg_gen_deposit_i64(xtl, t1, t0, 32, 32);
> +tcg_temp_free_i64(t0);
> +tcg_temp_free_i64(t1);
> +} else {
> +tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_LEQ);
> +gen_helper_bswap32x2(xth, xth);
> +tcg_gen_addi_tl(EA, EA, 8);
> +tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_LEQ);
> +gen_helper_bswap32x2(xtl, xtl);
> +}

So.. for starters using a helper for just one endianness is kind of
ugly.  But for going on with, it looks like the two paths are doing
the same thing, just one is doing it with TCG ops and the other via a
helper.

>  tcg_temp_free(EA);
> -tcg_temp_free_i64(tmp);
>  }
>  
>  #define VSX_STORE_SCALAR(name, operation) \

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature