Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
On 28.03.2011, at 19:55, Peter Maydell wrote: On 28 March 2011 18:23, Alexander Graf ag...@suse.de wrote: On 03/24/2011 06:29 PM, Peter Maydell wrote: +/* condition codes for binary FP ops */ +static uint32_t set_cc_f32(float32 v1, float32 v2) +{ +if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) { +return 3; +} else if (float32_eq(v1, v2,env-fpu_status)) { +return 0; +} else if (float32_lt(v1, v2,env-fpu_status)) { +return 1; +} else { +return 2; +} +} Can you not use float32_compare_quiet() (returns a value telling you if it's less/equal/greater/unordered)? If not, needs a comment saying why you need to do it the hard way. I just checked the macros there and it looks like float32_compare_quiet returns eq when both numbers are NaN. Hmm? if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) \ extractFloat ## s ## Frac( a ) ) || \ ( ( extractFloat ## s ## Exp( b ) == nan_exp ) \ extractFloat ## s ## Frac( b ) )) {\ if (!is_quiet || \ float ## s ## _is_signaling_nan( a ) || \ float ## s ## _is_signaling_nan( b ) ) { \ float_raise( float_flag_invalid STATUS_VAR); \ }\ return float_relation_unordered; \ }\ If A is a NaN (ie its exponent is nan_exp and the frac bits aren't zero) or B is a NaN then we return float_relation_unordered. We would still have to convert from the return value from that over to a CC value. I honestly don't see any benefit - the code doesn't become cleaner or smaller. So you get static uint32_t set_cc_f32(float32 v1, float32 v2) { switch (float32_compare_quiet(v1, v2, env-fpu_status)) { case float_relation_unordered: return 3; case float_relation_equal: return 0; case float_relation_less: return 1; case float_relation_greater: return 2; case float_relation_unordered: return 3; } } (and you probably want to put the conversion switch into a function since you'll be using it several times.) Which I think is pretty straightforward, cleaner because we only call one softfloat function rather than several, and should be faster too (we get to avoid repeating a pile of tedious bit manipulation in the eq and lt functions). Alrighty, changed the code :). +/* load 128-bit FP zero */ +void HELPER(lzxr)(uint32_t f1) +{ +CPU_QuadU x; +x.q = float64_to_float128(float64_zero,env-fpu_status); Yuck. Just define a float128_zero if we need one. Good point. Mind to do so? I find myself struggling with the code there. We could just follow the pattern of float128_default_nan_{low,high} in softfloat.h: #define float128_zero_low LIT64(0) #define float128_zero_high LIT64(0) then your function has: x.q.high = float128_zero_high; x.q.low = float128_zero_low; Or we could do something with an expression that returns a struct type; that would be cleaner. I think the default nan code is assuming it might have to be compiled with something other than gcc. However I forget the C syntax and have to go home now :-) I'll just leave it to you for a follow-up patch :). The less I have to touch in softfpu code, the better :) Alex
Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
On 03/24/2011 06:29 PM, Peter Maydell wrote: On 24 March 2011 15:58, Alexander Grafag...@suse.de wrote: This is more random comments in passing than a thorough review; sorry. +#if HOST_LONG_BITS == 64 defined(__GNUC__) +/* assuming 64-bit hosts have __uint128_t */ +__uint128_t dividend = (((__uint128_t)env-regs[r1]) 64) | + (env-regs[r1+1]); +__uint128_t quotient = dividend / divisor; +env-regs[r1+1] = quotient; +__uint128_t remainder = dividend % divisor; +env-regs[r1] = remainder; +#else +/* 32-bit hosts would need special wrapper functionality - just abort if + we encounter such a case; it's very unlikely anyways. */ +cpu_abort(env, 128 - 64/64 division not implemented\n); +#endif ...I'm still using a 32 bit system :-) +/* condition codes for binary FP ops */ +static uint32_t set_cc_f32(float32 v1, float32 v2) +{ +if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) { +return 3; +} else if (float32_eq(v1, v2,env-fpu_status)) { +return 0; +} else if (float32_lt(v1, v2,env-fpu_status)) { +return 1; +} else { +return 2; +} +} Can you not use float32_compare_quiet() (returns a value telling you if it's less/equal/greater/unordered)? If not, needs a comment saying why you need to do it the hard way. I just checked the macros there and it looks like float32_compare_quiet returns eq when both numbers are NaN. We would still have to convert from the return value from that over to a CC value. I honestly don't see any benefit - the code doesn't become cleaner or smaller. int float32_compare_quiet( float32 a, float32 b STATUS_PARAM ) { if (isless(a, b)) { return float_relation_less; } else if (a == b) { return float_relation_equal; } else if (isgreater(a, b)) { return float_relation_greater; } else { return float_relation_unordered; } } so static uint32_t set_cc_f32(float32 v1, float32 v2) { if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) { return 3; } else if (float32_eq(v1, v2, env-fpu_status)) { return 0; } else if (float32_lt(v1, v2, env-fpu_status)) { return 1; } else { return 2; } } would become static uint32_t set_cc_f32(float32 v1, float32 v2) { int r; if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) { return 3; } r = float32_compare_quiet(v1, v2, env-fpu_status); switch (r) { case float_relation_equal: return 0; case float_relation_less: return 1; default: return 2; } } +/* negative absolute of 32-bit float */ +uint32_t HELPER(lcebr)(uint32_t f1, uint32_t f2) +{ +env-fregs[f1].l.upper = float32_sub(float32_zero, env-fregs[f2].l.upper, +env-fpu_status); +return set_cc_nz_f32(env-fregs[f1].l.upper); +} Google suggests this is wrong: http://publib.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/DZ9AR006/19.4.10?SHELF=DT=19990630131355CASE= says for lcebr that: The sign is inverted for any operand, including a QNaN or SNaN, without causing an arithmetic exception. but float32_sub will raise exceptions for NaNs. You want float32_chs() (and similarly for the other types). Ah, nice :). Thanks! +/* convert 64-bit float to 128-bit float */ +uint32_t HELPER(lcxbr)(uint32_t f1, uint32_t f2) Wrong comment? Looks like another invert-sign op from the online POO. Yup, wrong comment and the same as above for chs :). +/* 128-bit FP compare RR */ +uint32_t HELPER(cxbr)(uint32_t f1, uint32_t f2) +{ +CPU_QuadU v1; +v1.ll.upper = env-fregs[f1].ll; +v1.ll.lower = env-fregs[f1 + 2].ll; +CPU_QuadU v2; +v2.ll.upper = env-fregs[f2].ll; +v2.ll.lower = env-fregs[f2 + 2].ll; +if (float128_is_any_nan(v1.q) || float128_is_any_nan(v2.q)) { +return 3; +} else if (float128_eq(v1.q, v2.q,env-fpu_status)) { +return 0; +} else if (float128_lt(v1.q, v2.q,env-fpu_status)) { +return 1; +} else { +return 2; +} +} float128_compare_quiet() again? See above :) +/* convert 32-bit float to 64-bit int */ +uint32_t HELPER(cgebr)(uint32_t r1, uint32_t f2, uint32_t m3) +{ +float32 v2 = env-fregs[f2].l.upper; +set_round_mode(m3); +env-regs[r1] = float32_to_int64(v2,env-fpu_status); +return set_cc_nz_f32(v2); +} Should this really be permanently setting the rounding mode for future instructions as well as for the op it does itself? IIUC every op that does rounding sets the rounding mode, no? +/* load 32-bit FP zero */ +void HELPER(lzer)(uint32_t f1) +{ +env-fregs[f1].l.upper = float32_zero; +} Surely this is trivial enough to inline rather than calling a helper function... Lots of the FPU code could use inlining. The CC stuff does too. For now, I kept things the way Uli wrote them :). +/* load 128-bit FP zero */ +void HELPER(lzxr)(uint32_t f1) +{ +CPU_QuadU x; +
Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
On 03/28/2011 10:23 AM, Alexander Graf wrote: I just checked the macros there and it looks like float32_compare_quiet returns eq when both numbers are NaN. No it doesn't -- a == b will never be true for either operand as NaN. Have a look at the real softfloat version anyway, not the softfloat-native version. This is hiding behind macros; look for float ## s ## _compare_internal. r~
Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
On 28 March 2011 18:23, Alexander Graf ag...@suse.de wrote: On 03/24/2011 06:29 PM, Peter Maydell wrote: +/* condition codes for binary FP ops */ +static uint32_t set_cc_f32(float32 v1, float32 v2) +{ + if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) { + return 3; + } else if (float32_eq(v1, v2,env-fpu_status)) { + return 0; + } else if (float32_lt(v1, v2,env-fpu_status)) { + return 1; + } else { + return 2; + } +} Can you not use float32_compare_quiet() (returns a value telling you if it's less/equal/greater/unordered)? If not, needs a comment saying why you need to do it the hard way. I just checked the macros there and it looks like float32_compare_quiet returns eq when both numbers are NaN. Hmm? if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) \ extractFloat ## s ## Frac( a ) ) || \ ( ( extractFloat ## s ## Exp( b ) == nan_exp ) \ extractFloat ## s ## Frac( b ) )) {\ if (!is_quiet || \ float ## s ## _is_signaling_nan( a ) || \ float ## s ## _is_signaling_nan( b ) ) { \ float_raise( float_flag_invalid STATUS_VAR); \ }\ return float_relation_unordered; \ }\ If A is a NaN (ie its exponent is nan_exp and the frac bits aren't zero) or B is a NaN then we return float_relation_unordered. We would still have to convert from the return value from that over to a CC value. I honestly don't see any benefit - the code doesn't become cleaner or smaller. So you get static uint32_t set_cc_f32(float32 v1, float32 v2) { switch (float32_compare_quiet(v1, v2, env-fpu_status)) { case float_relation_unordered: return 3; case float_relation_equal: return 0; case float_relation_less: return 1; case float_relation_greater: return 2; case float_relation_unordered: return 3; } } (and you probably want to put the conversion switch into a function since you'll be using it several times.) Which I think is pretty straightforward, cleaner because we only call one softfloat function rather than several, and should be faster too (we get to avoid repeating a pile of tedious bit manipulation in the eq and lt functions). +/* load 128-bit FP zero */ +void HELPER(lzxr)(uint32_t f1) +{ + CPU_QuadU x; + x.q = float64_to_float128(float64_zero,env-fpu_status); Yuck. Just define a float128_zero if we need one. Good point. Mind to do so? I find myself struggling with the code there. We could just follow the pattern of float128_default_nan_{low,high} in softfloat.h: #define float128_zero_low LIT64(0) #define float128_zero_high LIT64(0) then your function has: x.q.high = float128_zero_high; x.q.low = float128_zero_low; Or we could do something with an expression that returns a struct type; that would be cleaner. I think the default nan code is assuming it might have to be compiled with something other than gcc. However I forget the C syntax and have to go home now :-) -- PMM
Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
On 03/24/2011 10:29 AM, Peter Maydell wrote: On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote: This is more random comments in passing than a thorough review; sorry. +#if HOST_LONG_BITS == 64 defined(__GNUC__) +/* assuming 64-bit hosts have __uint128_t */ +__uint128_t dividend = (((__uint128_t)env-regs[r1]) 64) | + (env-regs[r1+1]); +__uint128_t quotient = dividend / divisor; +env-regs[r1+1] = quotient; +__uint128_t remainder = dividend % divisor; +env-regs[r1] = remainder; +#else +/* 32-bit hosts would need special wrapper functionality - just abort if + we encounter such a case; it's very unlikely anyways. */ +cpu_abort(env, 128 - 64/64 division not implemented\n); +#endif ...I'm still using a 32 bit system :-) A couple of options: (1) Steal code from gcc's __[u]divdi3 for implementing double-word division via single-word division. In this case, your single-word will be long long. (2) Implement a simple bit reduction loop. This is probably easiest. (3) Reuse some of the softfloat code that manipulates 128bit quantities. This is probably the best option, particularly if the availability of __uint128 is taught to softfloat so that it doesn't always open-code stuff that the compiler could take care of. r~
Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote: This is more random comments in passing than a thorough review; sorry. +#if HOST_LONG_BITS == 64 defined(__GNUC__) + /* assuming 64-bit hosts have __uint128_t */ + __uint128_t dividend = (((__uint128_t)env-regs[r1]) 64) | + (env-regs[r1+1]); + __uint128_t quotient = dividend / divisor; + env-regs[r1+1] = quotient; + __uint128_t remainder = dividend % divisor; + env-regs[r1] = remainder; +#else + /* 32-bit hosts would need special wrapper functionality - just abort if + we encounter such a case; it's very unlikely anyways. */ + cpu_abort(env, 128 - 64/64 division not implemented\n); +#endif ...I'm still using a 32 bit system :-) +/* condition codes for binary FP ops */ +static uint32_t set_cc_f32(float32 v1, float32 v2) +{ + if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) { + return 3; + } else if (float32_eq(v1, v2, env-fpu_status)) { + return 0; + } else if (float32_lt(v1, v2, env-fpu_status)) { + return 1; + } else { + return 2; + } +} Can you not use float32_compare_quiet() (returns a value telling you if it's less/equal/greater/unordered)? If not, needs a comment saying why you need to do it the hard way. +/* negative absolute of 32-bit float */ +uint32_t HELPER(lcebr)(uint32_t f1, uint32_t f2) +{ + env-fregs[f1].l.upper = float32_sub(float32_zero, env-fregs[f2].l.upper, + env-fpu_status); + return set_cc_nz_f32(env-fregs[f1].l.upper); +} Google suggests this is wrong: http://publib.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/DZ9AR006/19.4.10?SHELF=DT=19990630131355CASE= says for lcebr that: The sign is inverted for any operand, including a QNaN or SNaN, without causing an arithmetic exception. but float32_sub will raise exceptions for NaNs. You want float32_chs() (and similarly for the other types). +/* convert 64-bit float to 128-bit float */ +uint32_t HELPER(lcxbr)(uint32_t f1, uint32_t f2) Wrong comment? Looks like another invert-sign op from the online POO. +/* 128-bit FP compare RR */ +uint32_t HELPER(cxbr)(uint32_t f1, uint32_t f2) +{ + CPU_QuadU v1; + v1.ll.upper = env-fregs[f1].ll; + v1.ll.lower = env-fregs[f1 + 2].ll; + CPU_QuadU v2; + v2.ll.upper = env-fregs[f2].ll; + v2.ll.lower = env-fregs[f2 + 2].ll; + if (float128_is_any_nan(v1.q) || float128_is_any_nan(v2.q)) { + return 3; + } else if (float128_eq(v1.q, v2.q, env-fpu_status)) { + return 0; + } else if (float128_lt(v1.q, v2.q, env-fpu_status)) { + return 1; + } else { + return 2; + } +} float128_compare_quiet() again? +/* convert 32-bit float to 64-bit int */ +uint32_t HELPER(cgebr)(uint32_t r1, uint32_t f2, uint32_t m3) +{ + float32 v2 = env-fregs[f2].l.upper; + set_round_mode(m3); + env-regs[r1] = float32_to_int64(v2, env-fpu_status); + return set_cc_nz_f32(v2); +} Should this really be permanently setting the rounding mode for future instructions as well as for the op it does itself? +/* load 32-bit FP zero */ +void HELPER(lzer)(uint32_t f1) +{ + env-fregs[f1].l.upper = float32_zero; +} Surely this is trivial enough to inline rather than calling a helper function... +/* load 128-bit FP zero */ +void HELPER(lzxr)(uint32_t f1) +{ + CPU_QuadU x; + x.q = float64_to_float128(float64_zero, env-fpu_status); Yuck. Just define a float128_zero if we need one. +uint32_t HELPER(tceb)(uint32_t f1, uint64_t m2) +{ + float32 v1 = env-fregs[f1].l.upper; + int neg = float32_is_neg(v1); + uint32_t cc = 0; + + HELPER_LOG(%s: v1 0x%lx m2 0x%lx neg %d\n, __FUNCTION__, (long)v1, m2, neg); + if ((float32_is_zero(v1) (m2 (1 (11-neg || + (float32_is_infinity(v1) (m2 (1 (5-neg || + (float32_is_any_nan(v1) (m2 (1 (3-neg || + (float32_is_signaling_nan(v1) (m2 (1 (1-neg) { + cc = 1; + } else if (m2 (1 (9-neg))) { + /* assume normalized number */ + cc = 1; + } + + /* FIXME: denormalized? */ + return cc; +} There's a float32_is_zero_or_denormal(); if you need a float32_is_denormal() which is false for real zero we could add it, I guess. +static inline uint32_t cc_calc_nabs_32(CPUState *env, int32_t dst) +{ + return !!dst; +} Another candidate for inlining. -- PMM
Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
Am 24.03.2011 um 18:41 schrieb Richard Henderson r...@twiddle.net: On 03/24/2011 10:29 AM, Peter Maydell wrote: On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote: This is more random comments in passing than a thorough review; sorry. +#if HOST_LONG_BITS == 64 defined(__GNUC__) +/* assuming 64-bit hosts have __uint128_t */ +__uint128_t dividend = (((__uint128_t)env-regs[r1]) 64) | + (env-regs[r1+1]); +__uint128_t quotient = dividend / divisor; +env-regs[r1+1] = quotient; +__uint128_t remainder = dividend % divisor; +env-regs[r1] = remainder; +#else +/* 32-bit hosts would need special wrapper functionality - just abort if + we encounter such a case; it's very unlikely anyways. */ +cpu_abort(env, 128 - 64/64 division not implemented\n); +#endif ...I'm still using a 32 bit system :-) A couple of options: (1) Steal code from gcc's __[u]divdi3 for implementing double-word division via single-word division. In this case, your single-word will be long long. (2) Implement a simple bit reduction loop. This is probably easiest. (3) Reuse some of the softfloat code that manipulates 128bit quantities. This is probably the best option, particularly if the availability of __uint128 is taught to softfloat so that it doesn't always open-code stuff that the compiler could take care of. In all applications I've run so far this abort has _never_ fired. I'm not sure gcc even emits it. So IMHO this abort doesn't hurt. Once we get a bug report of a user hitting it, we can think of ways to implement the missing bits. For now, it's not worse than a not implemented opcode (of which we still have quite a number). Alex
Re: [Qemu-devel] [PATCH 14/17] s390x: Implement opcode helpers
Am 24.03.2011 um 18:29 schrieb Peter Maydell peter.mayd...@linaro.org: On 24 March 2011 15:58, Alexander Graf ag...@suse.de wrote: This is more random comments in passing than a thorough review; sorry. +#if HOST_LONG_BITS == 64 defined(__GNUC__) +/* assuming 64-bit hosts have __uint128_t */ +__uint128_t dividend = (((__uint128_t)env-regs[r1]) 64) | + (env-regs[r1+1]); +__uint128_t quotient = dividend / divisor; +env-regs[r1+1] = quotient; +__uint128_t remainder = dividend % divisor; +env-regs[r1] = remainder; +#else +/* 32-bit hosts would need special wrapper functionality - just abort if + we encounter such a case; it's very unlikely anyways. */ +cpu_abort(env, 128 - 64/64 division not implemented\n); +#endif ...I'm still using a 32 bit system :-) +/* condition codes for binary FP ops */ +static uint32_t set_cc_f32(float32 v1, float32 v2) +{ +if (float32_is_any_nan(v1) || float32_is_any_nan(v2)) { +return 3; +} else if (float32_eq(v1, v2, env-fpu_status)) { +return 0; +} else if (float32_lt(v1, v2, env-fpu_status)) { +return 1; +} else { +return 2; +} +} Can you not use float32_compare_quiet() (returns a value telling you if it's less/equal/greater/unordered)? If not, needs a comment saying why you need to do it the hard way. Phew - I really have (almost) no clue about fp. Those parts come from Uli. So I guess it's the easiest to just ask him :) +/* negative absolute of 32-bit float */ +uint32_t HELPER(lcebr)(uint32_t f1, uint32_t f2) +{ +env-fregs[f1].l.upper = float32_sub(float32_zero, env-fregs[f2].l.upper, + env-fpu_status); +return set_cc_nz_f32(env-fregs[f1].l.upper); +} Google suggests this is wrong: http://publib.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/DZ9AR006/19.4.10?SHELF=DT=19990630131355CASE= says for lcebr that: The sign is inverted for any operand, including a QNaN or SNaN, without causing an arithmetic exception. but float32_sub will raise exceptions for NaNs. You want float32_chs() (and similarly for the other types). +/* convert 64-bit float to 128-bit float */ +uint32_t HELPER(lcxbr)(uint32_t f1, uint32_t f2) Wrong comment? Looks like another invert-sign op from the online POO. +/* 128-bit FP compare RR */ +uint32_t HELPER(cxbr)(uint32_t f1, uint32_t f2) +{ +CPU_QuadU v1; +v1.ll.upper = env-fregs[f1].ll; +v1.ll.lower = env-fregs[f1 + 2].ll; +CPU_QuadU v2; +v2.ll.upper = env-fregs[f2].ll; +v2.ll.lower = env-fregs[f2 + 2].ll; +if (float128_is_any_nan(v1.q) || float128_is_any_nan(v2.q)) { +return 3; +} else if (float128_eq(v1.q, v2.q, env-fpu_status)) { +return 0; +} else if (float128_lt(v1.q, v2.q, env-fpu_status)) { +return 1; +} else { +return 2; +} +} float128_compare_quiet() again? +/* convert 32-bit float to 64-bit int */ +uint32_t HELPER(cgebr)(uint32_t r1, uint32_t f2, uint32_t m3) +{ +float32 v2 = env-fregs[f2].l.upper; +set_round_mode(m3); +env-regs[r1] = float32_to_int64(v2, env-fpu_status); +return set_cc_nz_f32(v2); +} Should this really be permanently setting the rounding mode for future instructions as well as for the op it does itself? +/* load 32-bit FP zero */ +void HELPER(lzer)(uint32_t f1) +{ +env-fregs[f1].l.upper = float32_zero; +} Surely this is trivial enough to inline rather than calling a helper function... +/* load 128-bit FP zero */ +void HELPER(lzxr)(uint32_t f1) +{ +CPU_QuadU x; +x.q = float64_to_float128(float64_zero, env-fpu_status); Yuck. Just define a float128_zero if we need one. +uint32_t HELPER(tceb)(uint32_t f1, uint64_t m2) +{ +float32 v1 = env-fregs[f1].l.upper; +int neg = float32_is_neg(v1); +uint32_t cc = 0; + +HELPER_LOG(%s: v1 0x%lx m2 0x%lx neg %d\n, __FUNCTION__, (long)v1, m2, neg); +if ((float32_is_zero(v1) (m2 (1 (11-neg || +(float32_is_infinity(v1) (m2 (1 (5-neg || +(float32_is_any_nan(v1) (m2 (1 (3-neg || +(float32_is_signaling_nan(v1) (m2 (1 (1-neg) { +cc = 1; +} else if (m2 (1 (9-neg))) { +/* assume normalized number */ +cc = 1; +} + +/* FIXME: denormalized? */ +return cc; +} There's a float32_is_zero_or_denormal(); if you need a float32_is_denormal() which is false for real zero we could add it, I guess. +static inline uint32_t cc_calc_nabs_32(CPUState *env, int32_t dst) +{ +return !!dst; +} Another candidate for inlining. -- PMM