Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-29 Thread Alexander Graf

On 28.03.2011, at 19:55, Peter Maydell wrote:

 On 28 March 2011 18:23, Alexander Graf ag...@suse.de wrote:
 On 03/24/2011 06:29 PM, Peter Maydell wrote:
 +/* condition codes for binary FP ops */
 +static uint32_t set_cc_f32(float32 v1, float32 v2)
 +{
 +if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) {
 +return 3;
 +} else if (float32_eq(v1, v2,env-fpu_status)) {
 +return 0;
 +} else if (float32_lt(v1, v2,env-fpu_status)) {
 +return 1;
 +} else {
 +return 2;
 +}
 +}
 
 Can you not use float32_compare_quiet() (returns a value
 telling you if it's less/equal/greater/unordered)?
 If not, needs a comment saying why you need to do it the hard way.
 
 I just checked the macros there and it looks like float32_compare_quiet
 returns eq when both numbers are NaN.
 
 Hmm?
 
if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) \
 extractFloat ## s ## Frac( a ) ) || \
( ( extractFloat ## s ## Exp( b ) == nan_exp ) \
  extractFloat ## s ## Frac( b ) )) {\
if (!is_quiet || \
float ## s ## _is_signaling_nan( a ) ||  \
float ## s ## _is_signaling_nan( b ) ) { \
float_raise( float_flag_invalid STATUS_VAR); \
}\
return float_relation_unordered; \
}\
 
 If A is a NaN (ie its exponent is nan_exp and the frac bits aren't zero)
 or B is a NaN then we return float_relation_unordered.
 
 We would still have to convert from
 the return value from that over to a CC value. I honestly don't see any
 benefit - the code doesn't become cleaner or smaller.
 
 So you get
 static uint32_t set_cc_f32(float32 v1, float32 v2)
 {
switch (float32_compare_quiet(v1, v2, env-fpu_status)) {
case float_relation_unordered:
return 3;
case float_relation_equal:
return 0;
case float_relation_less:
return 1;
case float_relation_greater:
return 2;
case float_relation_unordered:
return 3;
}
 }
 
 (and you probably want to put the conversion switch into a function
 since you'll be using it several times.)
 
 Which I think is pretty straightforward, cleaner because we only
 call one softfloat function rather than several, and should be
 faster too (we get to avoid repeating a pile of tedious bit manipulation
 in the eq and lt functions).

Alrighty, changed the code :).

 
 +/* load 128-bit FP zero */
 +void HELPER(lzxr)(uint32_t f1)
 +{
 +CPU_QuadU x;
 +x.q = float64_to_float128(float64_zero,env-fpu_status);
 
 Yuck. Just define a float128_zero if we need one.
 
 Good point. Mind to do so? I find myself struggling with the code there.
 
 We could just follow the pattern of  float128_default_nan_{low,high}
 in softfloat.h:
 
 #define float128_zero_low LIT64(0)
 #define float128_zero_high LIT64(0)
 
 then your function has:
 x.q.high = float128_zero_high;
 x.q.low = float128_zero_low;
 
 Or we could do something with an expression that returns a
 struct type; that would be cleaner. I think the default nan
 code is assuming it might have to be compiled with something
 other than gcc. However I forget the C syntax and have to go
 home now :-)

I'll just leave it to you for a follow-up patch :). The less I have to touch in 
softfpu code, the better :)


Alex




Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-28 Thread Alexander Graf

On 03/24/2011 06:29 PM, Peter Maydell wrote:

On 24 March 2011 15:58, Alexander Grafag...@suse.de  wrote:

This is more random comments in passing than a thorough review; sorry.


+#if HOST_LONG_BITS == 64  defined(__GNUC__)
+/* assuming 64-bit hosts have __uint128_t */
+__uint128_t dividend = (((__uint128_t)env-regs[r1])  64) |
+   (env-regs[r1+1]);
+__uint128_t quotient = dividend / divisor;
+env-regs[r1+1] = quotient;
+__uint128_t remainder = dividend % divisor;
+env-regs[r1] = remainder;
+#else
+/* 32-bit hosts would need special wrapper functionality - just abort 
if
+   we encounter such a case; it's very unlikely anyways. */
+cpu_abort(env, 128 -  64/64 division not implemented\n);
+#endif

...I'm still using a 32 bit system :-)


+/* condition codes for binary FP ops */
+static uint32_t set_cc_f32(float32 v1, float32 v2)
+{
+if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) {
+return 3;
+} else if (float32_eq(v1, v2,env-fpu_status)) {
+return 0;
+} else if (float32_lt(v1, v2,env-fpu_status)) {
+return 1;
+} else {
+return 2;
+}
+}

Can you not use float32_compare_quiet() (returns a value
telling you if it's less/equal/greater/unordered)?
If not, needs a comment saying why you need to do it the hard way.


I just checked the macros there and it looks like float32_compare_quiet 
returns eq when both numbers are NaN. We would still have to convert 
from the return value from that over to a CC value. I honestly don't see 
any benefit - the code doesn't become cleaner or smaller.


int float32_compare_quiet( float32 a, float32 b STATUS_PARAM )
{
if (isless(a, b)) {
return float_relation_less;
} else if (a == b) {
return float_relation_equal;
} else if (isgreater(a, b)) {
return float_relation_greater;
} else {
return float_relation_unordered;
}
}


so

static uint32_t set_cc_f32(float32 v1, float32 v2)
{
if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) {
return 3;
} else if (float32_eq(v1, v2, env-fpu_status)) {
return 0;
} else if (float32_lt(v1, v2, env-fpu_status)) {
return 1;
} else {
return 2;
}
}

would become

static uint32_t set_cc_f32(float32 v1, float32 v2)
{
int r;

if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) {
return 3;
}

r = float32_compare_quiet(v1, v2, env-fpu_status);
switch (r) {
case float_relation_equal:
return 0;
case float_relation_less:
return 1;
default:
return 2;
}
}



+/* negative absolute of 32-bit float */
+uint32_t HELPER(lcebr)(uint32_t f1, uint32_t f2)
+{
+env-fregs[f1].l.upper = float32_sub(float32_zero, env-fregs[f2].l.upper,
+env-fpu_status);
+return set_cc_nz_f32(env-fregs[f1].l.upper);
+}

Google suggests this is wrong:
http://publib.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/DZ9AR006/19.4.10?SHELF=DT=19990630131355CASE=
says for lcebr that:
The sign is inverted for any operand, including a QNaN or SNaN,
without causing an arithmetic exception.

but float32_sub will raise exceptions for NaNs. You want
float32_chs() (and similarly for the other types).


Ah, nice :). Thanks!


+/* convert 64-bit float to 128-bit float */
+uint32_t HELPER(lcxbr)(uint32_t f1, uint32_t f2)

Wrong comment? Looks like another invert-sign op from
the online POO.


Yup, wrong comment and the same as above for chs :).


+/* 128-bit FP compare RR */
+uint32_t HELPER(cxbr)(uint32_t f1, uint32_t f2)
+{
+CPU_QuadU v1;
+v1.ll.upper = env-fregs[f1].ll;
+v1.ll.lower = env-fregs[f1 + 2].ll;
+CPU_QuadU v2;
+v2.ll.upper = env-fregs[f2].ll;
+v2.ll.lower = env-fregs[f2 + 2].ll;
+if (float128_is_any_nan(v1.q) || float128_is_any_nan(v2.q)) {
+return 3;
+} else if (float128_eq(v1.q, v2.q,env-fpu_status)) {
+return 0;
+} else if (float128_lt(v1.q, v2.q,env-fpu_status)) {
+return 1;
+} else {
+return 2;
+}
+}

float128_compare_quiet() again?


See above :)


+/* convert 32-bit float to 64-bit int */
+uint32_t HELPER(cgebr)(uint32_t r1, uint32_t f2, uint32_t m3)
+{
+float32 v2 = env-fregs[f2].l.upper;
+set_round_mode(m3);
+env-regs[r1] = float32_to_int64(v2,env-fpu_status);
+return set_cc_nz_f32(v2);
+}

Should this really be permanently setting the rounding mode
for future instructions as well as for the op it does itself?


IIUC every op that does rounding sets the rounding mode, no?


+/* load 32-bit FP zero */
+void HELPER(lzer)(uint32_t f1)
+{
+env-fregs[f1].l.upper = float32_zero;
+}

Surely this is trivial enough to inline rather than
calling a helper function...


Lots of the FPU code could use inlining. The CC stuff does too. For now, 
I kept things the way Uli wrote them :).



+/* load 128-bit FP zero */
+void HELPER(lzxr)(uint32_t f1)
+{
+CPU_QuadU x;
+

Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-28 Thread Richard Henderson
On 03/28/2011 10:23 AM, Alexander Graf wrote:
 I just checked the macros there and it looks like
 float32_compare_quiet returns eq when both numbers are NaN.

No it doesn't -- a == b will never be true for either operand as NaN.

Have a look at the real softfloat version anyway, not the
softfloat-native version.  This is hiding behind macros; 
look for float ## s ## _compare_internal.



r~



Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-28 Thread Peter Maydell
On 28 March 2011 18:23, Alexander Graf ag...@suse.de wrote:
 On 03/24/2011 06:29 PM, Peter Maydell wrote:
 +/* condition codes for binary FP ops */
 +static uint32_t set_cc_f32(float32 v1, float32 v2)
 +{
 +    if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) {
 +        return 3;
 +    } else if (float32_eq(v1, v2,env-fpu_status)) {
 +        return 0;
 +    } else if (float32_lt(v1, v2,env-fpu_status)) {
 +        return 1;
 +    } else {
 +        return 2;
 +    }
 +}

 Can you not use float32_compare_quiet() (returns a value
 telling you if it's less/equal/greater/unordered)?
 If not, needs a comment saying why you need to do it the hard way.

 I just checked the macros there and it looks like float32_compare_quiet
 returns eq when both numbers are NaN.

Hmm?

if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) \
 extractFloat ## s ## Frac( a ) ) || \
( ( extractFloat ## s ## Exp( b ) == nan_exp ) \
  extractFloat ## s ## Frac( b ) )) {\
if (!is_quiet || \
float ## s ## _is_signaling_nan( a ) ||  \
float ## s ## _is_signaling_nan( b ) ) { \
float_raise( float_flag_invalid STATUS_VAR); \
}\
return float_relation_unordered; \
}\

If A is a NaN (ie its exponent is nan_exp and the frac bits aren't zero)
or B is a NaN then we return float_relation_unordered.

 We would still have to convert from
 the return value from that over to a CC value. I honestly don't see any
 benefit - the code doesn't become cleaner or smaller.

So you get
static uint32_t set_cc_f32(float32 v1, float32 v2)
{
    switch (float32_compare_quiet(v1, v2, env-fpu_status)) {
case float_relation_unordered:
return 3;
    case float_relation_equal:
        return 0;
    case float_relation_less:
        return 1;
    case float_relation_greater:
        return 2;
case float_relation_unordered:
return 3;
    }
}

(and you probably want to put the conversion switch into a function
since you'll be using it several times.)

Which I think is pretty straightforward, cleaner because we only
call one softfloat function rather than several, and should be
faster too (we get to avoid repeating a pile of tedious bit manipulation
in the eq and lt functions).

 +/* load 128-bit FP zero */
 +void HELPER(lzxr)(uint32_t f1)
 +{
 +    CPU_QuadU x;
 +    x.q = float64_to_float128(float64_zero,env-fpu_status);

 Yuck. Just define a float128_zero if we need one.

 Good point. Mind to do so? I find myself struggling with the code there.

We could just follow the pattern of  float128_default_nan_{low,high}
in softfloat.h:

#define float128_zero_low LIT64(0)
#define float128_zero_high LIT64(0)

then your function has:
 x.q.high = float128_zero_high;
 x.q.low = float128_zero_low;

Or we could do something with an expression that returns a
struct type; that would be cleaner. I think the default nan
code is assuming it might have to be compiled with something
other than gcc. However I forget the C syntax and have to go
home now :-)

-- PMM



Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-24 Thread Richard Henderson
On 03/24/2011 10:29 AM, Peter Maydell wrote:
 On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote:
 
 This is more random comments in passing than a thorough review; sorry.
 
 +#if HOST_LONG_BITS == 64  defined(__GNUC__)
 +/* assuming 64-bit hosts have __uint128_t */
 +__uint128_t dividend = (((__uint128_t)env-regs[r1])  64) |
 +   (env-regs[r1+1]);
 +__uint128_t quotient = dividend / divisor;
 +env-regs[r1+1] = quotient;
 +__uint128_t remainder = dividend % divisor;
 +env-regs[r1] = remainder;
 +#else
 +/* 32-bit hosts would need special wrapper functionality - just 
 abort if
 +   we encounter such a case; it's very unlikely anyways. */
 +cpu_abort(env, 128 - 64/64 division not implemented\n);
 +#endif
 
 ...I'm still using a 32 bit system :-)

A couple of options:

(1) Steal code from gcc's __[u]divdi3 for implementing double-word division via
single-word division.  In this case, your single-word will be long long.

(2) Implement a simple bit reduction loop.  This is probably easiest.

(3) Reuse some of the softfloat code that manipulates 128bit quantities.  This
is probably the best option, particularly if the availability of __uint128
is taught to softfloat so that it doesn't always open-code stuff that the
compiler could take care of.


r~



Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-24 Thread Peter Maydell
On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote:

This is more random comments in passing than a thorough review; sorry.

 +#if HOST_LONG_BITS == 64  defined(__GNUC__)
 +        /* assuming 64-bit hosts have __uint128_t */
 +        __uint128_t dividend = (((__uint128_t)env-regs[r1])  64) |
 +                               (env-regs[r1+1]);
 +        __uint128_t quotient = dividend / divisor;
 +        env-regs[r1+1] = quotient;
 +        __uint128_t remainder = dividend % divisor;
 +        env-regs[r1] = remainder;
 +#else
 +        /* 32-bit hosts would need special wrapper functionality - just 
 abort if
 +           we encounter such a case; it's very unlikely anyways. */
 +        cpu_abort(env, 128 - 64/64 division not implemented\n);
 +#endif

...I'm still using a 32 bit system :-)

 +/* condition codes for binary FP ops */
 +static uint32_t set_cc_f32(float32 v1, float32 v2)
 +{
 +    if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) {
 +        return 3;
 +    } else if (float32_eq(v1, v2, env-fpu_status)) {
 +        return 0;
 +    } else if (float32_lt(v1, v2, env-fpu_status)) {
 +        return 1;
 +    } else {
 +        return 2;
 +    }
 +}

Can you not use float32_compare_quiet() (returns a value
telling you if it's less/equal/greater/unordered)?
If not, needs a comment saying why you need to do it the hard way.

 +/* negative absolute of 32-bit float */
 +uint32_t HELPER(lcebr)(uint32_t f1, uint32_t f2)
 +{
 +    env-fregs[f1].l.upper = float32_sub(float32_zero, 
 env-fregs[f2].l.upper,
 +                                         env-fpu_status);
 +    return set_cc_nz_f32(env-fregs[f1].l.upper);
 +}

Google suggests this is wrong:
http://publib.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/DZ9AR006/19.4.10?SHELF=DT=19990630131355CASE=
says for lcebr that:
The sign is inverted for any operand, including a QNaN or SNaN,
without causing an arithmetic exception.

but float32_sub will raise exceptions for NaNs. You want
float32_chs() (and similarly for the other types).

 +/* convert 64-bit float to 128-bit float */
 +uint32_t HELPER(lcxbr)(uint32_t f1, uint32_t f2)

Wrong comment? Looks like another invert-sign op from
the online POO.

 +/* 128-bit FP compare RR */
 +uint32_t HELPER(cxbr)(uint32_t f1, uint32_t f2)
 +{
 +    CPU_QuadU v1;
 +    v1.ll.upper = env-fregs[f1].ll;
 +    v1.ll.lower = env-fregs[f1 + 2].ll;
 +    CPU_QuadU v2;
 +    v2.ll.upper = env-fregs[f2].ll;
 +    v2.ll.lower = env-fregs[f2 + 2].ll;
 +    if (float128_is_any_nan(v1.q) || float128_is_any_nan(v2.q)) {
 +        return 3;
 +    } else if (float128_eq(v1.q, v2.q, env-fpu_status)) {
 +        return 0;
 +    } else if (float128_lt(v1.q, v2.q, env-fpu_status)) {
 +        return 1;
 +    } else {
 +        return 2;
 +    }
 +}

float128_compare_quiet() again?

 +/* convert 32-bit float to 64-bit int */
 +uint32_t HELPER(cgebr)(uint32_t r1, uint32_t f2, uint32_t m3)
 +{
 +    float32 v2 = env-fregs[f2].l.upper;
 +    set_round_mode(m3);
 +    env-regs[r1] = float32_to_int64(v2, env-fpu_status);
 +    return set_cc_nz_f32(v2);
 +}

Should this really be permanently setting the rounding mode
for future instructions as well as for the op it does itself?

 +/* load 32-bit FP zero */
 +void HELPER(lzer)(uint32_t f1)
 +{
 +    env-fregs[f1].l.upper = float32_zero;
 +}

Surely this is trivial enough to inline rather than
calling a helper function...

 +/* load 128-bit FP zero */
 +void HELPER(lzxr)(uint32_t f1)
 +{
 +    CPU_QuadU x;
 +    x.q = float64_to_float128(float64_zero, env-fpu_status);

Yuck. Just define a float128_zero if we need one.

 +uint32_t HELPER(tceb)(uint32_t f1, uint64_t m2)
 +{
 +    float32 v1 = env-fregs[f1].l.upper;
 +    int neg = float32_is_neg(v1);
 +    uint32_t cc = 0;
 +
 +    HELPER_LOG(%s: v1 0x%lx m2 0x%lx neg %d\n, __FUNCTION__, (long)v1, m2, 
 neg);
 +    if ((float32_is_zero(v1)  (m2  (1  (11-neg ||
 +        (float32_is_infinity(v1)  (m2  (1  (5-neg ||
 +        (float32_is_any_nan(v1)  (m2  (1  (3-neg ||
 +        (float32_is_signaling_nan(v1)  (m2  (1  (1-neg) {
 +        cc = 1;
 +    } else if (m2  (1  (9-neg))) {
 +        /* assume normalized number */
 +        cc = 1;
 +    }
 +
 +    /* FIXME: denormalized? */
 +    return cc;
 +}

There's a float32_is_zero_or_denormal(); if you need a
float32_is_denormal() which is false for real zero we
could add it, I guess.

 +static inline uint32_t cc_calc_nabs_32(CPUState *env, int32_t dst)
 +{
 +    return !!dst;
 +}

Another candidate for inlining.

-- PMM



Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-24 Thread Alexander Graf

Am 24.03.2011 um 18:41 schrieb Richard Henderson r...@twiddle.net:

 On 03/24/2011 10:29 AM, Peter Maydell wrote:
 On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote:
 
 This is more random comments in passing than a thorough review; sorry.
 
 +#if HOST_LONG_BITS == 64  defined(__GNUC__)
 +/* assuming 64-bit hosts have __uint128_t */
 +__uint128_t dividend = (((__uint128_t)env-regs[r1])  64) |
 +   (env-regs[r1+1]);
 +__uint128_t quotient = dividend / divisor;
 +env-regs[r1+1] = quotient;
 +__uint128_t remainder = dividend % divisor;
 +env-regs[r1] = remainder;
 +#else
 +/* 32-bit hosts would need special wrapper functionality - just 
 abort if
 +   we encounter such a case; it's very unlikely anyways. */
 +cpu_abort(env, 128 - 64/64 division not implemented\n);
 +#endif
 
 ...I'm still using a 32 bit system :-)
 
 A couple of options:
 
 (1) Steal code from gcc's __[u]divdi3 for implementing double-word division 
 via
single-word division.  In this case, your single-word will be long long.
 
 (2) Implement a simple bit reduction loop.  This is probably easiest.
 
 (3) Reuse some of the softfloat code that manipulates 128bit quantities.  This
is probably the best option, particularly if the availability of __uint128
is taught to softfloat so that it doesn't always open-code stuff that the
compiler could take care of.

In all applications I've run so far this abort has _never_ fired. I'm not sure 
gcc even emits it.

So IMHO this abort doesn't hurt. Once we get a bug report of a user hitting it, 
we can think of ways to implement the missing bits. For now, it's not worse 
than a not implemented opcode (of which we still have quite a number).


Alex

 



Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers

2011-03-24 Thread Alexander Graf

Am 24.03.2011 um 18:29 schrieb Peter Maydell peter.mayd...@linaro.org:

 On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote:
 
 This is more random comments in passing than a thorough review; sorry.
 
 +#if HOST_LONG_BITS == 64  defined(__GNUC__)
 +/* assuming 64-bit hosts have __uint128_t */
 +__uint128_t dividend = (((__uint128_t)env-regs[r1])  64) |
 +   (env-regs[r1+1]);
 +__uint128_t quotient = dividend / divisor;
 +env-regs[r1+1] = quotient;
 +__uint128_t remainder = dividend % divisor;
 +env-regs[r1] = remainder;
 +#else
 +/* 32-bit hosts would need special wrapper functionality - just 
 abort if
 +   we encounter such a case; it's very unlikely anyways. */
 +cpu_abort(env, 128 - 64/64 division not implemented\n);
 +#endif
 
 ...I'm still using a 32 bit system :-)
 
 +/* condition codes for binary FP ops */
 +static uint32_t set_cc_f32(float32 v1, float32 v2)
 +{
 +if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) {
 +return 3;
 +} else if (float32_eq(v1, v2, env-fpu_status)) {
 +return 0;
 +} else if (float32_lt(v1, v2, env-fpu_status)) {
 +return 1;
 +} else {
 +return 2;
 +}
 +}
 
 Can you not use float32_compare_quiet() (returns a value
 telling you if it's less/equal/greater/unordered)?
 If not, needs a comment saying why you need to do it the hard way.

Phew - I really have (almost) no clue about fp. Those parts come from Uli. So I 
guess it's the easiest to just ask him :)

 
 +/* negative absolute of 32-bit float */
 +uint32_t HELPER(lcebr)(uint32_t f1, uint32_t f2)
 +{
 +env-fregs[f1].l.upper = float32_sub(float32_zero, 
 env-fregs[f2].l.upper,
 + env-fpu_status);
 +return set_cc_nz_f32(env-fregs[f1].l.upper);
 +}
 
 Google suggests this is wrong:
 http://publib.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/DZ9AR006/19.4.10?SHELF=DT=19990630131355CASE=
 says for lcebr that:
 The sign is inverted for any operand, including a QNaN or SNaN,
 without causing an arithmetic exception.
 
 but float32_sub will raise exceptions for NaNs. You want
 float32_chs() (and similarly for the other types).
 
 +/* convert 64-bit float to 128-bit float */
 +uint32_t HELPER(lcxbr)(uint32_t f1, uint32_t f2)
 
 Wrong comment? Looks like another invert-sign op from
 the online POO.
 
 +/* 128-bit FP compare RR */
 +uint32_t HELPER(cxbr)(uint32_t f1, uint32_t f2)
 +{
 +CPU_QuadU v1;
 +v1.ll.upper = env-fregs[f1].ll;
 +v1.ll.lower = env-fregs[f1 + 2].ll;
 +CPU_QuadU v2;
 +v2.ll.upper = env-fregs[f2].ll;
 +v2.ll.lower = env-fregs[f2 + 2].ll;
 +if (float128_is_any_nan(v1.q) || float128_is_any_nan(v2.q)) {
 +return 3;
 +} else if (float128_eq(v1.q, v2.q, env-fpu_status)) {
 +return 0;
 +} else if (float128_lt(v1.q, v2.q, env-fpu_status)) {
 +return 1;
 +} else {
 +return 2;
 +}
 +}
 
 float128_compare_quiet() again?
 
 +/* convert 32-bit float to 64-bit int */
 +uint32_t HELPER(cgebr)(uint32_t r1, uint32_t f2, uint32_t m3)
 +{
 +float32 v2 = env-fregs[f2].l.upper;
 +set_round_mode(m3);
 +env-regs[r1] = float32_to_int64(v2, env-fpu_status);
 +return set_cc_nz_f32(v2);
 +}
 
 Should this really be permanently setting the rounding mode
 for future instructions as well as for the op it does itself?
 
 +/* load 32-bit FP zero */
 +void HELPER(lzer)(uint32_t f1)
 +{
 +env-fregs[f1].l.upper = float32_zero;
 +}
 
 Surely this is trivial enough to inline rather than
 calling a helper function...
 
 +/* load 128-bit FP zero */
 +void HELPER(lzxr)(uint32_t f1)
 +{
 +CPU_QuadU x;
 +x.q = float64_to_float128(float64_zero, env-fpu_status);
 
 Yuck. Just define a float128_zero if we need one.
 
 +uint32_t HELPER(tceb)(uint32_t f1, uint64_t m2)
 +{
 +float32 v1 = env-fregs[f1].l.upper;
 +int neg = float32_is_neg(v1);
 +uint32_t cc = 0;
 +
 +HELPER_LOG(%s: v1 0x%lx m2 0x%lx neg %d\n, __FUNCTION__, (long)v1, 
 m2, neg);
 +if ((float32_is_zero(v1)  (m2  (1  (11-neg ||
 +(float32_is_infinity(v1)  (m2  (1  (5-neg ||
 +(float32_is_any_nan(v1)  (m2  (1  (3-neg ||
 +(float32_is_signaling_nan(v1)  (m2  (1  (1-neg) {
 +cc = 1;
 +} else if (m2  (1  (9-neg))) {
 +/* assume normalized number */
 +cc = 1;
 +}
 +
 +/* FIXME: denormalized? */
 +return cc;
 +}
 
 There's a float32_is_zero_or_denormal(); if you need a
 float32_is_denormal() which is false for real zero we
 could add it, I guess.
 
 +static inline uint32_t cc_calc_nabs_32(CPUState *env, int32_t dst)
 +{
 +return !!dst;
 +}
 
 Another candidate for inlining.
 
 -- PMM