Re: [Qemu-devel] [PATCH 1/4] target-ppc: Implement bcdcfsq. instruction

2016-11-17 Thread David Gibson
On Thu, Nov 17, 2016 at 03:31:48PM -0200, jos...@linux.vnet.ibm.com wrote:
> Hello David,
> 
> Thank you for your review. I have just one question below, could you
> help me to address it please?
> 
> Thank you!
> 
> Ziviani
> 
> On Thu, Nov 17, 2016 at 02:42:43PM +1100, David Gibson wrote:
> > On Wed, Nov 16, 2016 at 06:07:27PM -0200, Jose Ricardo Ziviani wrote:
> > > bcdcfsq.: Decimal convert from signed quadword. It is possible to
> > 
> > I think there should be a "not" in there.
> > 
> > > convert values less than 10^31-1 or greater than -10^31-1 to be
> > > represented in packed decimal format.
> > > 
> > > Signed-off-by: Jose Ricardo Ziviani 
> > > ---
> > >  target-ppc/helper.h |  1 +
> > >  target-ppc/int_helper.c | 48 
> > > +
> > >  target-ppc/translate/vmx-impl.inc.c |  7 ++
> > >  3 files changed, 56 insertions(+)
> > > 
> > > diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> > > index da00f0a..87f533c 100644
> > > --- a/target-ppc/helper.h
> > > +++ b/target-ppc/helper.h
> > > @@ -382,6 +382,7 @@ DEF_HELPER_3(bcdcfn, i32, avr, avr, i32)
> > >  DEF_HELPER_3(bcdctn, i32, avr, avr, i32)
> > >  DEF_HELPER_3(bcdcfz, i32, avr, avr, i32)
> > >  DEF_HELPER_3(bcdctz, i32, avr, avr, i32)
> > > +DEF_HELPER_3(bcdcfsq, i32, avr, avr, i32)
> > >  
> > >  DEF_HELPER_2(xsadddp, void, env, i32)
> > >  DEF_HELPER_2(xssubdp, void, env, i32)
> > > diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> > > index 9ac204a..db65a51 100644
> > > --- a/target-ppc/int_helper.c
> > > +++ b/target-ppc/int_helper.c
> > > @@ -2874,6 +2874,54 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, 
> > > uint32_t ps)
> > >  return cr;
> > >  }
> > >  
> > > +uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
> > > +{
> > > +int i;
> > > +int cr = 0;
> > > +int ox_flag = 0;
> > > +uint64_t digit = 0;
> > > +uint64_t carry = 0;
> > > +uint64_t lo_value = 0;
> > > +uint64_t hi_value = 0;
> > 
> > Most of the variables above don't need initializers.
> > 
> > > +uint64_t max = ULLONG_MAX;
> > > +ppc_avr_t ret = { .u64 = { 0, 0 } };
> > > +
> > > +if (b->s64[HI_IDX] < 0) {
> > > +hi_value = -b->s64[HI_IDX];
> > > +lo_value = b->s64[LO_IDX];
> > 
> > I'm pretty sure this is wrong.  Take for example 128-bit -1:
> >    
> > Upper word is negative (64-bit -1), so
> > hi_value =  0001
> > lo_value =  
> > 
> > 0x1   != +1
> > 
> > > +bcd_put_digit(, 0xD, 0);
> > > +} else if (b->s64[HI_IDX] == 0 && b->s64[LO_IDX] < 0) {
> > > +lo_value = -b->s64[LO_IDX];
> > > +bcd_put_digit(, 0xD, 0);
> > > +} else {
> > > +hi_value = b->s64[HI_IDX];
> > > +lo_value = b->s64[LO_IDX];
> > > +bcd_put_digit(, bcd_preferred_sgn(0, ps), 0);
> > > +}
> > > +
> > > +if (unlikely(hi_value > 0x7e37be2022)) {
> > 
> > This doesn't look right.  Unless by chance 10^31-1 is equal to (k*2^64
> > - 1) you need to look at the lo_value as well.
> > 
> > > +ox_flag = 1;
> > 
> > You might as well just return 1<< CRF_SO here - no point actually
> > computing a meaningless value.
> > 
> > > +}
> > > +
> > > +carry = hi_value;
> > > +for (i = 0; i < 32; i++, max /= 10, lo_value /= 10) {
> > 
> > Looks like this loop has one too many iterations - there are 32
> > iterations, but you only have 31 digits.
> > 
> > > +digit = ((max % 10) * hi_value) + (lo_value % 10) + carry;
> > > +carry = (digit > 9) ? digit / 10 : 0;
> > > +
> > > +bcd_put_digit(, (carry) ? digit % 10 : digit, i + 1);
> > 
> > Ugh, this is hard to follow.  We're already using an Int128 library in
> > the memory region code; wonder if we should just use that here as well.
> > 
> 
> today we have divu128 (host-utils.h) but it doesn't work for me because
> it coerces the 128bits result in a 64bits variable:
> 
> __uint128_t result = dividend / divisor;
> *plow = result;  // -> plow is an uint64_t
> 
> So I wonder if you suggest me (like the idea) to implement div and mod
> in Int128 lib. Something like:
> 
> static inline Int128 int128_div64(Int128 a, uint64_t b);
> static inline Int128 int128_mod64(Int128 a, uint64_t b);

*thinks*  no.. it's simpler than that.

You can use divu128() to divide your input by 10^15, and it gives you
both the dividend and the remainder.  The dividend gives you the value
you need in the upper BCD word, and the remainder gives you the value
needed in the lower BCD word.  If the dividend doesn't fit in 64-bits,
you'ver overflowed (bceause the original value was > 2^64*10^15 which
is greater than 10^31)

Then you can use regular 64-bit arithmetic to split up those two
halves into individual digits.

> But, anyway, these functions would have to implement those hi/lo
> multiplications for the case 

Re: [Qemu-devel] [PATCH 1/4] target-ppc: Implement bcdcfsq. instruction

2016-11-17 Thread joserz
Hello David,

Thank you for your review. I have just one question below, could you
help me to address it please?

Thank you!

Ziviani

On Thu, Nov 17, 2016 at 02:42:43PM +1100, David Gibson wrote:
> On Wed, Nov 16, 2016 at 06:07:27PM -0200, Jose Ricardo Ziviani wrote:
> > bcdcfsq.: Decimal convert from signed quadword. It is possible to
> 
> I think there should be a "not" in there.
> 
> > convert values less than 10^31-1 or greater than -10^31-1 to be
> > represented in packed decimal format.
> > 
> > Signed-off-by: Jose Ricardo Ziviani 
> > ---
> >  target-ppc/helper.h |  1 +
> >  target-ppc/int_helper.c | 48 
> > +
> >  target-ppc/translate/vmx-impl.inc.c |  7 ++
> >  3 files changed, 56 insertions(+)
> > 
> > diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> > index da00f0a..87f533c 100644
> > --- a/target-ppc/helper.h
> > +++ b/target-ppc/helper.h
> > @@ -382,6 +382,7 @@ DEF_HELPER_3(bcdcfn, i32, avr, avr, i32)
> >  DEF_HELPER_3(bcdctn, i32, avr, avr, i32)
> >  DEF_HELPER_3(bcdcfz, i32, avr, avr, i32)
> >  DEF_HELPER_3(bcdctz, i32, avr, avr, i32)
> > +DEF_HELPER_3(bcdcfsq, i32, avr, avr, i32)
> >  
> >  DEF_HELPER_2(xsadddp, void, env, i32)
> >  DEF_HELPER_2(xssubdp, void, env, i32)
> > diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> > index 9ac204a..db65a51 100644
> > --- a/target-ppc/int_helper.c
> > +++ b/target-ppc/int_helper.c
> > @@ -2874,6 +2874,54 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, 
> > uint32_t ps)
> >  return cr;
> >  }
> >  
> > +uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
> > +{
> > +int i;
> > +int cr = 0;
> > +int ox_flag = 0;
> > +uint64_t digit = 0;
> > +uint64_t carry = 0;
> > +uint64_t lo_value = 0;
> > +uint64_t hi_value = 0;
> 
> Most of the variables above don't need initializers.
> 
> > +uint64_t max = ULLONG_MAX;
> > +ppc_avr_t ret = { .u64 = { 0, 0 } };
> > +
> > +if (b->s64[HI_IDX] < 0) {
> > +hi_value = -b->s64[HI_IDX];
> > +lo_value = b->s64[LO_IDX];
> 
> I'm pretty sure this is wrong.  Take for example 128-bit -1:
>      
> Upper word is negative (64-bit -1), so
>   hi_value =  0001
>   lo_value =  
> 
> 0x1   != +1
> 
> > +bcd_put_digit(, 0xD, 0);
> > +} else if (b->s64[HI_IDX] == 0 && b->s64[LO_IDX] < 0) {
> > +lo_value = -b->s64[LO_IDX];
> > +bcd_put_digit(, 0xD, 0);
> > +} else {
> > +hi_value = b->s64[HI_IDX];
> > +lo_value = b->s64[LO_IDX];
> > +bcd_put_digit(, bcd_preferred_sgn(0, ps), 0);
> > +}
> > +
> > +if (unlikely(hi_value > 0x7e37be2022)) {
> 
> This doesn't look right.  Unless by chance 10^31-1 is equal to (k*2^64
> - 1) you need to look at the lo_value as well.
> 
> > +ox_flag = 1;
> 
> You might as well just return 1<< CRF_SO here - no point actually
> computing a meaningless value.
> 
> > +}
> > +
> > +carry = hi_value;
> > +for (i = 0; i < 32; i++, max /= 10, lo_value /= 10) {
> 
> Looks like this loop has one too many iterations - there are 32
> iterations, but you only have 31 digits.
> 
> > +digit = ((max % 10) * hi_value) + (lo_value % 10) + carry;
> > +carry = (digit > 9) ? digit / 10 : 0;
> > +
> > +bcd_put_digit(, (carry) ? digit % 10 : digit, i + 1);
> 
> Ugh, this is hard to follow.  We're already using an Int128 library in
> the memory region code; wonder if we should just use that here as well.
> 

today we have divu128 (host-utils.h) but it doesn't work for me because
it coerces the 128bits result in a 64bits variable:

__uint128_t result = dividend / divisor;
*plow = result;  // -> plow is an uint64_t

So I wonder if you suggest me (like the idea) to implement div and mod
in Int128 lib. Something like:

static inline Int128 int128_div64(Int128 a, uint64_t b);
static inline Int128 int128_mod64(Int128 a, uint64_t b);

But, anyway, these functions would have to implement those hi/lo
multiplications for the case "!CONFIG_INT128".


> > +}
> > +
> > +cr = bcd_cmp_zero();
> > +
> > +if (unlikely(ox_flag)) {
> > +cr |= 1 << CRF_SO;
> > +}
> > +
> > +*r = ret;
> > +
> > +return cr;
> > +}
> > +
> >  void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
> >  {
> >  int i;
> > diff --git a/target-ppc/translate/vmx-impl.inc.c 
> > b/target-ppc/translate/vmx-impl.inc.c
> > index 7143eb3..36141e5 100644
> > --- a/target-ppc/translate/vmx-impl.inc.c
> > +++ b/target-ppc/translate/vmx-impl.inc.c
> > @@ -989,10 +989,14 @@ GEN_BCD2(bcdcfn)
> >  GEN_BCD2(bcdctn)
> >  GEN_BCD2(bcdcfz)
> >  GEN_BCD2(bcdctz)
> > +GEN_BCD2(bcdcfsq)
> >  
> >  static void gen_xpnd04_1(DisasContext *ctx)
> >  {
> >  switch (opc4(ctx->opcode)) {
> > +case 2:
> > +gen_bcdcfsq(ctx);
> > +break;
> >  case 4:
> >  

Re: [Qemu-devel] [PATCH 1/4] target-ppc: Implement bcdcfsq. instruction

2016-11-16 Thread David Gibson
On Wed, Nov 16, 2016 at 06:07:27PM -0200, Jose Ricardo Ziviani wrote:
> bcdcfsq.: Decimal convert from signed quadword. It is possible to

I think there should be a "not" in there.

> convert values less than 10^31-1 or greater than -10^31-1 to be
> represented in packed decimal format.
> 
> Signed-off-by: Jose Ricardo Ziviani 
> ---
>  target-ppc/helper.h |  1 +
>  target-ppc/int_helper.c | 48 
> +
>  target-ppc/translate/vmx-impl.inc.c |  7 ++
>  3 files changed, 56 insertions(+)
> 
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index da00f0a..87f533c 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -382,6 +382,7 @@ DEF_HELPER_3(bcdcfn, i32, avr, avr, i32)
>  DEF_HELPER_3(bcdctn, i32, avr, avr, i32)
>  DEF_HELPER_3(bcdcfz, i32, avr, avr, i32)
>  DEF_HELPER_3(bcdctz, i32, avr, avr, i32)
> +DEF_HELPER_3(bcdcfsq, i32, avr, avr, i32)
>  
>  DEF_HELPER_2(xsadddp, void, env, i32)
>  DEF_HELPER_2(xssubdp, void, env, i32)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 9ac204a..db65a51 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -2874,6 +2874,54 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, 
> uint32_t ps)
>  return cr;
>  }
>  
> +uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
> +{
> +int i;
> +int cr = 0;
> +int ox_flag = 0;
> +uint64_t digit = 0;
> +uint64_t carry = 0;
> +uint64_t lo_value = 0;
> +uint64_t hi_value = 0;

Most of the variables above don't need initializers.

> +uint64_t max = ULLONG_MAX;
> +ppc_avr_t ret = { .u64 = { 0, 0 } };
> +
> +if (b->s64[HI_IDX] < 0) {
> +hi_value = -b->s64[HI_IDX];
> +lo_value = b->s64[LO_IDX];

I'm pretty sure this is wrong.  Take for example 128-bit -1:
   
Upper word is negative (64-bit -1), so
hi_value =  0001
lo_value =  

0x1   != +1

> +bcd_put_digit(, 0xD, 0);
> +} else if (b->s64[HI_IDX] == 0 && b->s64[LO_IDX] < 0) {
> +lo_value = -b->s64[LO_IDX];
> +bcd_put_digit(, 0xD, 0);
> +} else {
> +hi_value = b->s64[HI_IDX];
> +lo_value = b->s64[LO_IDX];
> +bcd_put_digit(, bcd_preferred_sgn(0, ps), 0);
> +}
> +
> +if (unlikely(hi_value > 0x7e37be2022)) {

This doesn't look right.  Unless by chance 10^31-1 is equal to (k*2^64
- 1) you need to look at the lo_value as well.

> +ox_flag = 1;

You might as well just return 1<< CRF_SO here - no point actually
computing a meaningless value.

> +}
> +
> +carry = hi_value;
> +for (i = 0; i < 32; i++, max /= 10, lo_value /= 10) {

Looks like this loop has one too many iterations - there are 32
iterations, but you only have 31 digits.

> +digit = ((max % 10) * hi_value) + (lo_value % 10) + carry;
> +carry = (digit > 9) ? digit / 10 : 0;
> +
> +bcd_put_digit(, (carry) ? digit % 10 : digit, i + 1);

Ugh, this is hard to follow.  We're already using an Int128 library in
the memory region code; wonder if we should just use that here as well.

> +}
> +
> +cr = bcd_cmp_zero();
> +
> +if (unlikely(ox_flag)) {
> +cr |= 1 << CRF_SO;
> +}
> +
> +*r = ret;
> +
> +return cr;
> +}
> +
>  void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
>  {
>  int i;
> diff --git a/target-ppc/translate/vmx-impl.inc.c 
> b/target-ppc/translate/vmx-impl.inc.c
> index 7143eb3..36141e5 100644
> --- a/target-ppc/translate/vmx-impl.inc.c
> +++ b/target-ppc/translate/vmx-impl.inc.c
> @@ -989,10 +989,14 @@ GEN_BCD2(bcdcfn)
>  GEN_BCD2(bcdctn)
>  GEN_BCD2(bcdcfz)
>  GEN_BCD2(bcdctz)
> +GEN_BCD2(bcdcfsq)
>  
>  static void gen_xpnd04_1(DisasContext *ctx)
>  {
>  switch (opc4(ctx->opcode)) {
> +case 2:
> +gen_bcdcfsq(ctx);
> +break;
>  case 4:
>  gen_bcdctz(ctx);
>  break;
> @@ -1014,6 +1018,9 @@ static void gen_xpnd04_1(DisasContext *ctx)
>  static void gen_xpnd04_2(DisasContext *ctx)
>  {
>  switch (opc4(ctx->opcode)) {
> +case 2:
> +gen_bcdcfsq(ctx);
> +break;
>  case 4:
>  gen_bcdctz(ctx);
>  break;

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Qemu-devel] [PATCH 1/4] target-ppc: Implement bcdcfsq. instruction

2016-11-16 Thread Jose Ricardo Ziviani
bcdcfsq.: Decimal convert from signed quadword. It is possible to
convert values less than 10^31-1 or greater than -10^31-1 to be
represented in packed decimal format.

Signed-off-by: Jose Ricardo Ziviani 
---
 target-ppc/helper.h |  1 +
 target-ppc/int_helper.c | 48 +
 target-ppc/translate/vmx-impl.inc.c |  7 ++
 3 files changed, 56 insertions(+)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index da00f0a..87f533c 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -382,6 +382,7 @@ DEF_HELPER_3(bcdcfn, i32, avr, avr, i32)
 DEF_HELPER_3(bcdctn, i32, avr, avr, i32)
 DEF_HELPER_3(bcdcfz, i32, avr, avr, i32)
 DEF_HELPER_3(bcdctz, i32, avr, avr, i32)
+DEF_HELPER_3(bcdcfsq, i32, avr, avr, i32)
 
 DEF_HELPER_2(xsadddp, void, env, i32)
 DEF_HELPER_2(xssubdp, void, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 9ac204a..db65a51 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2874,6 +2874,54 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, 
uint32_t ps)
 return cr;
 }
 
+uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
+{
+int i;
+int cr = 0;
+int ox_flag = 0;
+uint64_t digit = 0;
+uint64_t carry = 0;
+uint64_t lo_value = 0;
+uint64_t hi_value = 0;
+uint64_t max = ULLONG_MAX;
+ppc_avr_t ret = { .u64 = { 0, 0 } };
+
+if (b->s64[HI_IDX] < 0) {
+hi_value = -b->s64[HI_IDX];
+lo_value = b->s64[LO_IDX];
+bcd_put_digit(, 0xD, 0);
+} else if (b->s64[HI_IDX] == 0 && b->s64[LO_IDX] < 0) {
+lo_value = -b->s64[LO_IDX];
+bcd_put_digit(, 0xD, 0);
+} else {
+hi_value = b->s64[HI_IDX];
+lo_value = b->s64[LO_IDX];
+bcd_put_digit(, bcd_preferred_sgn(0, ps), 0);
+}
+
+if (unlikely(hi_value > 0x7e37be2022)) {
+ox_flag = 1;
+}
+
+carry = hi_value;
+for (i = 0; i < 32; i++, max /= 10, lo_value /= 10) {
+digit = ((max % 10) * hi_value) + (lo_value % 10) + carry;
+carry = (digit > 9) ? digit / 10 : 0;
+
+bcd_put_digit(, (carry) ? digit % 10 : digit, i + 1);
+}
+
+cr = bcd_cmp_zero();
+
+if (unlikely(ox_flag)) {
+cr |= 1 << CRF_SO;
+}
+
+*r = ret;
+
+return cr;
+}
+
 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
 {
 int i;
diff --git a/target-ppc/translate/vmx-impl.inc.c 
b/target-ppc/translate/vmx-impl.inc.c
index 7143eb3..36141e5 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -989,10 +989,14 @@ GEN_BCD2(bcdcfn)
 GEN_BCD2(bcdctn)
 GEN_BCD2(bcdcfz)
 GEN_BCD2(bcdctz)
+GEN_BCD2(bcdcfsq)
 
 static void gen_xpnd04_1(DisasContext *ctx)
 {
 switch (opc4(ctx->opcode)) {
+case 2:
+gen_bcdcfsq(ctx);
+break;
 case 4:
 gen_bcdctz(ctx);
 break;
@@ -1014,6 +1018,9 @@ static void gen_xpnd04_1(DisasContext *ctx)
 static void gen_xpnd04_2(DisasContext *ctx)
 {
 switch (opc4(ctx->opcode)) {
+case 2:
+gen_bcdcfsq(ctx);
+break;
 case 4:
 gen_bcdctz(ctx);
 break;
-- 
2.7.4