Re: [PATCH 2/2] target/i386: fix IEEE x87 floating-point exception raising

2021-05-20 Thread Peter Maydell
On Fri, 15 May 2020 at 22:23, Joseph Myers  wrote:
>
> Most x87 instruction implementations fail to raise the expected IEEE
> floating-point exceptions because they do nothing to convert the
> exception state from the softfloat machinery into the exception flags
> in the x87 status word.  There is special-case handling of division to
> raise the divide-by-zero exception, but that handling is itself buggy:
> it raises the exception in inappropriate cases (inf / 0 and nan / 0,
> which should not raise any exceptions, and 0 / 0, which should raise
> "invalid" instead).

> Signed-off-by: Joseph Myers 
> ---
>  target/i386/fpu_helper.c | 126 +++-
>  tests/tcg/i386/test-i386-fp-exceptions.c | 831 +++
>  2 files changed, 926 insertions(+), 31 deletions(-)
>  create mode 100644 tests/tcg/i386/test-i386-fp-exceptions.c

I've just noticed that the new test program here provokes compiler
warnings when 'make check-tcg' builds it:

make[2]: Entering directory
'/home/petmay01/linaro/qemu-for-merges/build/all-linux-static/tests/tcg/i386-linux-user'
/home/petmay01/linaro/qemu-for-merges/tests/docker/docker.py --engine
auto cc --cc gcc -i qemu/fedora-i386-cross -s
/home/petmay01/linaro/qemu-for-merges --   -Wall -Werror -O0 -g
-fno-strict-aliasing -m32
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c
-o test-i386-fp-exceptions  -static
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:
Assembler messages:
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:426:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fistp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:433:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fistp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:440:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fistp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:447:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fistp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:454:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fistp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:541:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fisttp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:548:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fisttp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:555:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fisttp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:562:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fisttp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:569:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fisttp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:576:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fisttp'
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-fp-exceptions.c:583:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fisttp'


There's a similar warning also in test-i386.c:

/home/petmay01/linaro/qemu-for-merges/tests/docker/docker.py --engine
auto cc --cc gcc -i qemu/fedora-i386-cross -s
/home/petmay01/linaro/qemu-for-merges --   -Wall -Werror -O0 -g
-fno-strict-aliasing -fno-pie  -static -m32 -o test-i386 \
   /home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386.c
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-code16.S
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386-vm86.S
-lm
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386.c:
Assembler messages:
/home/petmay01/linaro/qemu-for-merges/tests/tcg/i386/test-i386.c:869:
Warning: no instruction mnemonic suffix given and no register
operands; using default for `fist'

They don't make the build fail but it would be nice if we could
make them go away...

thanks
-- PMM



Re: [PATCH 2/2] target/i386: fix IEEE x87 floating-point exception raising

2020-05-19 Thread Joseph Myers
On Tue, 19 May 2020, Richard Henderson wrote:

> > Note that another bug in the x87 emulation is the lack of setting C1 for 
> > most instructions with inexact results based on the direction of rounding 
> > (which will require a new feature to be added to the softfloat code to 
> > record that information so the x87 emulation can use it).
> 
> Wow, I don't believe I ever knew about that detail.

musl libc uses it to get correctly rounded double-precision sqrt with x87 
arithmetic.  (glibc instead temporarily sets the rounding precision to 
achieve the same goal.)

-- 
Joseph S. Myers
jos...@codesourcery.com



Re: [PATCH 2/2] target/i386: fix IEEE x87 floating-point exception raising

2020-05-19 Thread Richard Henderson
On 5/19/20 11:12 AM, Joseph Myers wrote:
> On Tue, 19 May 2020, Richard Henderson wrote:
> 
>> To retain the hard float fast path, we need to leave float_flag_invalid set
>> when the accrued exception bit is set.  To me this suggests keep all of the
>> FPUS_* bits in fp_status and only convert to FPUS_* when we read the fp 
>> status
>> word.
> 
> There is no hard float fast path that I can see for floatx80.  The issue 
> of the fast path might be relevant for fixing SSE exception handling 
> (which has some similar issues to x87), but not for floatx80.

Oops, yes.  Wasn't thinking for a moment.

> Note that another bug in the x87 emulation is the lack of setting C1 for 
> most instructions with inexact results based on the direction of rounding 
> (which will require a new feature to be added to the softfloat code to 
> record that information so the x87 emulation can use it).

Wow, I don't believe I ever knew about that detail.

This looks similar to the indication that PPC gives (and qemu does not
implement) in FPSCR.FR ("The last instruction incremented the fraction during
rounding").

I guess it's not quite the same -- ppc indicates the fraction was incremented,
while x87 indicates that the value rounded up.  Which could be computed from
(fraction incremented) ^ sign, I suppose.


r~



Re: [PATCH 2/2] target/i386: fix IEEE x87 floating-point exception raising

2020-05-19 Thread Joseph Myers
On Tue, 19 May 2020, Richard Henderson wrote:

> To retain the hard float fast path, we need to leave float_flag_invalid set
> when the accrued exception bit is set.  To me this suggests keep all of the
> FPUS_* bits in fp_status and only convert to FPUS_* when we read the fp status
> word.

There is no hard float fast path that I can see for floatx80.  The issue 
of the fast path might be relevant for fixing SSE exception handling 
(which has some similar issues to x87), but not for floatx80.

Note that another bug in the x87 emulation is the lack of setting C1 for 
most instructions with inexact results based on the direction of rounding 
(which will require a new feature to be added to the softfloat code to 
record that information so the x87 emulation can use it).

> When it comes to raising unmasked exceptions... I have a couple of thoughts.

I expect some code will be needed in each individual instruction 
implementation, and probably extra softfloat code, to handle unmasked 
exceptions.  Some exceptions, when unmasked, should result in instructions 
not popping inputs from the stack and not updating destinations.  The 
softfloat case needs to provide information about the exact underflow case 
that targets can use when that exception is set to trap.  x87 overflow and 
underflow, when unmasked and with a register destination, are supposed to 
compute and store a result with a biased exponent for use by the trap 
handler.  The code will also need to know exactly which instructions 
should result in a trap handler being called rather than only doing it for 
fwait.  Stack underflow and overflow need to be checked for, regardless of 
exception masking.  (There are other issues relating to trapped exception 
handling as well, but that's a summary of the main ones I've noticed.)

-- 
Joseph S. Myers
jos...@codesourcery.com



Re: [PATCH 2/2] target/i386: fix IEEE x87 floating-point exception raising

2020-05-19 Thread Richard Henderson
On 5/15/20 2:21 PM, Joseph Myers wrote:
> +uint8_t new_flags = get_float_exception_flags(>fp_status);
> +float_raise(old_flags, >fp_status);
> +fpu_set_exception(env,
> +  ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
> +   (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
> +   (new_flags & float_flag_overflow ? FPUS_OE : 0) |
> +   (new_flags & float_flag_underflow ? FPUS_UE : 0) |
> +   (new_flags & float_flag_inexact ? FPUS_PE : 0) |
> +   (new_flags & float_flag_input_denormal ? FPUS_DE : 
> 0)));
> +}

This is not ideal from the point of view of interacting with softfloat's
deferral to host hard float.

I know you're working toward raising unmasked exceptions, but I think we will
want to handle that in a different way.

To retain the hard float fast path, we need to leave float_flag_invalid set
when the accrued exception bit is set.  To me this suggests keep all of the
FPUS_* bits in fp_status and only convert to FPUS_* when we read the fp status
word.

When it comes to raising unmasked exceptions... I have a couple of thoughts.

(1) Enhance softfloat to record the exceptions raised for the previous
operation, separate from the accrued exceptions.

This gets into trouble vs float_flag_invalid though, because we can't compute
that through the hard-float fast path.  And without knowing that
float_flag_invalid is currently irrelevant, we can't use the fast path at all.

This might still help in some places (though not here), so it might still be
worth exploring.

(2) In save_exception_flags, only zero the bits for which we have unmasked
exceptions.  In the normal case this would be none of them, which solves the
fast-path problem above.

To simplify this at runtime, I would suggest pre-computing a softfloat mask of
unmasked exceptions when we change the fp control word.


r~



[PATCH 2/2] target/i386: fix IEEE x87 floating-point exception raising

2020-05-15 Thread Joseph Myers
Most x87 instruction implementations fail to raise the expected IEEE
floating-point exceptions because they do nothing to convert the
exception state from the softfloat machinery into the exception flags
in the x87 status word.  There is special-case handling of division to
raise the divide-by-zero exception, but that handling is itself buggy:
it raises the exception in inappropriate cases (inf / 0 and nan / 0,
which should not raise any exceptions, and 0 / 0, which should raise
"invalid" instead).

Fix this by converting the floating-point exceptions raised during an
operation by the softfloat machinery into exceptions in the x87 status
word (passing through the existing fpu_set_exception function for
handling related to trapping exceptions).  There are special cases
where some functions convert to integer internally but exceptions from
that conversion are not always correct exceptions for the instruction
to raise.

There might be scope for some simplification if the softfloat
exception state either could always be assumed to be in sync with the
state in the status word, or could always be ignored at the start of
each instruction and just set to 0 then; I haven't looked into that in
detail, and it might run into interactions with the various ways the
emulation does not yet handle trapping exceptions properly.  I think
the approach taken here, of saving the softfloat state, setting
exceptions there to 0 and then merging the old exceptions back in
after carrying out the operation, is conservatively safe.

Signed-off-by: Joseph Myers 
---
 target/i386/fpu_helper.c | 126 +++-
 tests/tcg/i386/test-i386-fp-exceptions.c | 831 +++
 2 files changed, 926 insertions(+), 31 deletions(-)
 create mode 100644 tests/tcg/i386/test-i386-fp-exceptions.c

diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c
index 8dcc9ddf68..c19cad466e 100644
--- a/target/i386/fpu_helper.c
+++ b/target/i386/fpu_helper.c
@@ -161,12 +161,32 @@ static void fpu_set_exception(CPUX86State *env, int mask)
 }
 }
 
+static inline uint8_t save_exception_flags(CPUX86State *env)
+{
+uint8_t old_flags = get_float_exception_flags(>fp_status);
+set_float_exception_flags(0, >fp_status);
+return old_flags;
+}
+
+static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
+{
+uint8_t new_flags = get_float_exception_flags(>fp_status);
+float_raise(old_flags, >fp_status);
+fpu_set_exception(env,
+  ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
+   (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
+   (new_flags & float_flag_overflow ? FPUS_OE : 0) |
+   (new_flags & float_flag_underflow ? FPUS_UE : 0) |
+   (new_flags & float_flag_inexact ? FPUS_PE : 0) |
+   (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
+}
+
 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 {
-if (floatx80_is_zero(b)) {
-fpu_set_exception(env, FPUS_ZE);
-}
-return floatx80_div(a, b, >fp_status);
+uint8_t old_flags = save_exception_flags(env);
+floatx80 ret = floatx80_div(a, b, >fp_status);
+merge_exception_flags(env, old_flags);
+return ret;
 }
 
 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
@@ -183,6 +203,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t 
retaddr)
 
 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 {
+uint8_t old_flags = save_exception_flags(env);
 union {
 float32 f;
 uint32_t i;
@@ -190,10 +211,12 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val)
 
 u.i = val;
 FT0 = float32_to_floatx80(u.f, >fp_status);
+merge_exception_flags(env, old_flags);
 }
 
 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 {
+uint8_t old_flags = save_exception_flags(env);
 union {
 float64 f;
 uint64_t i;
@@ -201,6 +224,7 @@ void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 
 u.i = val;
 FT0 = float64_to_floatx80(u.f, >fp_status);
+merge_exception_flags(env, old_flags);
 }
 
 void helper_fildl_FT0(CPUX86State *env, int32_t val)
@@ -210,6 +234,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val)
 
 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 {
+uint8_t old_flags = save_exception_flags(env);
 int new_fpstt;
 union {
 float32 f;
@@ -221,10 +246,12 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val)
 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, >fp_status);
 env->fpstt = new_fpstt;
 env->fptags[new_fpstt] = 0; /* validate stack entry */
+merge_exception_flags(env, old_flags);
 }
 
 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 {
+uint8_t old_flags = save_exception_flags(env);
 int new_fpstt;
 union {
 float64 f;
@@ -236,6 +263,7 @@ void helper_fldl_ST0(CPUX86State *env, uint64_t val)