Hi,

On Mon, 18 Nov 2013, Claudio Fontana wrote:

> >> +    case 3:
> >> +        tcg_gen_rotr_i64(r, cpu_reg(reg), tcg_shift);
> >> +        break;
> > 
> > Incorrect rotate for 32bit?

32bit rotates and shifts were fixed in a patch later than the 60er series 
Alex posted.  See attached.  (Generally there are many fixes to emulated 
instructions in that branch)

> >> +    if (!shift_amount && source == 0x1f) {
> 
> Besides the comment, is this correct?

No, it needs to check for opc == 1.

> >> +    tcg_dest = cpu_reg(dest);
> >> +    switch (opc) {
> >> +    case 0x0:
> >> +    case 0x3:
> >> +        tcg_gen_and_i64(tcg_dest, cpu_reg(source), tcg_op2);
> >> +        break;
> >> +    case 0x1:
> >> +        tcg_gen_or_i64(tcg_dest, cpu_reg(source), tcg_op2);
> >> +        break;
> >> +    case 0x2:
> >> +        tcg_gen_xor_i64(tcg_dest, cpu_reg(source), tcg_op2);
> >> +        break;
> >> +    }
> >> +
> >> +    if (is_32bit) {
> >> +        tcg_gen_ext32u_i64(tcg_dest, tcg_dest);
> >> +    }
> >> +
> >> +    if (setflags) {
> >> +        gen_helper_pstate_add(pstate, pstate, tcg_dest, cpu_reg(31), 
> >> tcg_dest);
> >> +    }
> > 
> > Incorrect flags generated.  They're different between add/sub and logical.
> > In particular, C and V are always zero.

That's done correctly with the fixed pstate helpers coming with a later 
patch (see attached as well).  reg31 is zero, so that's flags as if for 
"dest == dest + 0", and PSTATE_C and PSTATE_V will be zero.  That is, the 
logical flags are the same as the arithmetic flags for result plus zero 
with no carry_in.


Ciao,
Michael.
From df54486da31d6329696effa61096eda5ab85395a Mon Sep 17 00:00:00 2001
From: Michael Matz <m...@suse.de>
Date: Sun, 24 Mar 2013 02:52:42 +0100
Subject: [PATCH] Fix 32bit rotates.

The 32bit shifts generally weren't careful with the upper parts,
either bits could leak in (for right shift) or leak or (for left shift).
And rotate was completely off, rotating around bit 63, not 31.
This fixes the CAST5 hash algorithm.
---
 target-arm/translate-a64.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 96dc281..e3941a1 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -596,25 +596,49 @@ static TCGv_i64 get_shift(int reg, int shift_type, TCGv_i64 tcg_shift,
     r = tcg_temp_new_i64();
 
     /* XXX carry_out */
+    /* Careful with the width.  We work on 64bit, but must make sure
+       that we zero-extend the result on out, and ignore any upper bits,
+       that might still be set in that register.  */
     switch (shift_type) {
     case 0: /* LSL */
+	/* left shift is easy, simply zero-extend on out */
         tcg_gen_shl_i64(r, cpu_reg(reg), tcg_shift);
+	if (is_32bit)
+	  tcg_gen_ext32u_i64 (r, r);
         break;
     case 1: /* LSR */
-        tcg_gen_shr_i64(r, cpu_reg(reg), tcg_shift);
+	/* For logical right shift we zero extend first, to zero
+	   the upper bits.  We don't need to extend on out.  */
+	if (is_32bit) {
+	    tcg_gen_ext32u_i64 (r, cpu_reg(reg));
+	    tcg_gen_shr_i64 (r, r, tcg_shift);
+	} else
+	  tcg_gen_shr_i64(r, cpu_reg(reg), tcg_shift);
         break;
     case 2: /* ASR */
+	/* For arithmetic right shift we sign extend first, then shift,
+	   and then need to clear the upper bits again.  */
         if (is_32bit) {
             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
             tcg_gen_ext32s_i64(tcg_tmp, cpu_reg(reg));
             tcg_gen_sar_i64(r, tcg_tmp, tcg_shift);
+	    tcg_gen_ext32u_i64 (r, r);
             tcg_temp_free_i64(tcg_tmp);
         } else {
             tcg_gen_sar_i64(r, cpu_reg(reg), tcg_shift);
         }
         break;
-    case 3:
-        tcg_gen_rotr_i64(r, cpu_reg(reg), tcg_shift);
+    case 3: /* ROR */
+	/* For rotation extending doesn't help, we really have to use
+	   a 32bit rotate.  */
+	if (is_32bit) {
+	    TCGv_i32 tmp = tcg_temp_new_i32();
+            tcg_gen_trunc_i64_i32(tmp, cpu_reg(reg));
+	    tcg_gen_rotr_i32(tmp, tmp, tcg_shift);
+            tcg_gen_extu_i32_i64(r, tmp);
+            tcg_temp_free_i32(tmp);
+	} else
+	  tcg_gen_rotr_i64(r, cpu_reg(reg), tcg_shift);
         break;
     }
 
-- 
1.8.1.4

From 33137f8a660750d7d8598c7e467f4ccc8dc5ef85 Mon Sep 17 00:00:00 2001
From: Michael Matz <m...@suse.de>
Date: Sat, 23 Mar 2013 04:53:44 +0100
Subject: [PATCH] Fix the pstate flags helpers

ADCS and SBCS/SUBS sometimes gave the wrong results
for the C and V flags.  This fixes it.
---
 target-arm/helper-a64.c | 52 ++++++++++++-------------------------------------
 1 file changed, 12 insertions(+), 40 deletions(-)

diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 4375bf0..4fcb09b 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -7,8 +7,6 @@
 
 uint32_t HELPER(pstate_add)(uint32_t pstate, uint64_t a1, uint64_t a2, uint64_t ar)
 {
-    int64_t s1 = a1;
-    int64_t s2 = a2;
     int64_t sr = ar;
 
     pstate &= ~(PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V);
@@ -21,11 +19,15 @@ uint32_t HELPER(pstate_add)(uint32_t pstate, uint64_t a1, uint64_t a2, uint64_t
         pstate |= PSTATE_Z;
     }
 
-    if (ar && (ar < a1)) {
+    if (ar < a1) {
         pstate |= PSTATE_C;
+    } else if (ar != (a1 + a2) && ar == a1) {
+	/* If result isn't what we expect it must be because we added
+	   in some carry.  If so we also produce a carry when ar == a1. */
+	pstate |= PSTATE_C;
     }
 
-    if ((s1 > 0 && s2 > 0 && sr < 0) || (s1 < 0 && s2 < 0 && sr > 0)) {
+    if ((int64_t)((a1 ^ a2 ^ -1) & (a1 ^ ar)) < 0) {
         pstate |= PSTATE_V;
     }
 
@@ -38,8 +40,6 @@ uint32_t HELPER(pstate_add32)(uint32_t pstate, uint64_t x1, uint64_t x2, uint64_
     uint32_t a2 = x2;
     uint32_t ar = xr;
 
-    int32_t s1 = a1;
-    int32_t s2 = a2;
     int32_t sr = ar;
 
     pstate &= ~(PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V);
@@ -52,11 +52,13 @@ uint32_t HELPER(pstate_add32)(uint32_t pstate, uint64_t x1, uint64_t x2, uint64_
         pstate |= PSTATE_Z;
     }
 
-    if (ar && (ar < a1)) {
+    if (ar < a1) {
         pstate |= PSTATE_C;
+    } else if (ar != (a1 + a2) && ar == a1) {
+	pstate |= PSTATE_C;
     }
 
-    if ((s1 > 0 && s2 > 0 && sr < 0) || (s1 < 0 && s2 < 0 && sr > 0)) {
+    if ((int32_t)((a1 ^ a2 ^ -1) & (a1 ^ ar)) < 0) {
         pstate |= PSTATE_V;
     }
 
@@ -65,23 +67,7 @@ uint32_t HELPER(pstate_add32)(uint32_t pstate, uint64_t x1, uint64_t x2, uint64_
 
 uint32_t HELPER(pstate_sub)(uint32_t pstate, uint64_t a1, uint64_t a2, uint64_t ar)
 {
-    int64_t sr = ar;
-    int64_t s1 = a1;
-    int64_t s2 = a2;
-
-    pstate = helper_pstate_add(pstate, a1, a2, ar);
-
-    pstate &= ~(PSTATE_C | PSTATE_V);
-
-    if (a2 <= a1) {
-        pstate |= PSTATE_C;
-    }
-
-    /* XXX check if this is the only special case */
-    if ((!a1 && a2 == 0x8000000000000000ULL) || (s1 > 0 && s2 < 0 && sr < 0) || (s1 < 0 && s2 > 0 && sr > 0)) {
-        pstate |= PSTATE_V;
-    }
-
+    pstate = helper_pstate_add(pstate, a1, ~a2, ar);
     return pstate;
 }
 
@@ -90,22 +76,8 @@ uint32_t HELPER(pstate_sub32)(uint32_t pstate, uint64_t x1, uint64_t x2, uint64_
     uint32_t a1 = x1;
     uint32_t a2 = x2;
     uint32_t ar = xr;
-    int32_t sr = ar;
-    int32_t s1 = a1;
-    int32_t s2 = a2;
-
-    pstate = helper_pstate_add32(pstate, a1, a2, ar);
-
-    pstate &= ~(PSTATE_C | PSTATE_V);
-
-    if (a2 <= a1) {
-        pstate |= PSTATE_C;
-    }
-
-    if ((!a1 && a2 == 0x80000000ULL) || (s1 > 0 && s2 < 0 && sr < 0) || (s1 < 0 && s2 > 0 && sr > 0)) {
-        pstate |= PSTATE_V;
-    }
 
+    pstate = helper_pstate_add32(pstate, a1, ~a2, ar);
     return pstate;
 }
 
-- 
1.8.1.4

Reply via email to