On Wed, Jun 19, 2013 at 8:27 AM, Tobias Burnus <bur...@net-b.de> wrote:

>> Attached patch initializes return variable in get_fpu_except_flags.
>> Additionally, it uses __asm__ and __volatile__ consistently, as
>> recommended for header files and unifies a bunch of formatting issues
>> throughout the file.
>
>
> OK. Thanks for having a second look and improving the file.

Actually, on a third look, there are multiple other issues in this file:

1. "cw_sse &= 0xffff0000;" is wrong, since it also clears FTZ, RC and DAZ flags.

2. x87 also needs to clear stalled exception flags, otherwise stalled
flag triggers exception, when corresponding exception bit is unmasked.

3. fsfcw should be used instead of fnstcw, so all pending exceptions
are handled (please note that in case when sticky exception flag is
not cleared in the exception handler, point 2 applies).

4. A lot of code could be simplified in set_fpu function.

2013-06-19  Uros Bizjak  <ubiz...@gmail.com>

    * config/fpu-387.h (_FPU_MASK_ALL): New.
    (set_fpu): Use fstcw to store x87 FPU control word. Use fnclex to
    clear stalled exception flags.  Correctly clear stalled SSE
    exception flags.  Simplify code.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu.

I will wait for a day for possible comments.

The patch should be committed to all release branches.

Uros.
Index: config/fpu-387.h
===================================================================
--- config/fpu-387.h    (revision 200211)
+++ config/fpu-387.h    (working copy)
@@ -96,23 +96,26 @@ has_sse (void)
 #define _FPU_MASK_UM  0x10
 #define _FPU_MASK_PM  0x20
 
+#define _FPU_MASK_ALL 0x3f
+
 void set_fpu (void)
 {
+  int excepts = 0;
   unsigned short cw;
 
-  __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
+  __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
 
-  cw |= (_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM
-        | _FPU_MASK_UM | _FPU_MASK_PM);
+  if (options.fpe & GFC_FPE_INVALID) excepts |= _FPU_MASK_IM;
+  if (options.fpe & GFC_FPE_DENORMAL) excepts |= _FPU_MASK_DM;
+  if (options.fpe & GFC_FPE_ZERO) excepts |= _FPU_MASK_ZM;
+  if (options.fpe & GFC_FPE_OVERFLOW) excepts |= _FPU_MASK_OM;
+  if (options.fpe & GFC_FPE_UNDERFLOW) excepts |= _FPU_MASK_UM;
+  if (options.fpe & GFC_FPE_INEXACT) excepts |= _FPU_MASK_PM;
 
-  if (options.fpe & GFC_FPE_INVALID) cw &= ~_FPU_MASK_IM;
-  if (options.fpe & GFC_FPE_DENORMAL) cw &= ~_FPU_MASK_DM;
-  if (options.fpe & GFC_FPE_ZERO) cw &= ~_FPU_MASK_ZM;
-  if (options.fpe & GFC_FPE_OVERFLOW) cw &= ~_FPU_MASK_OM;
-  if (options.fpe & GFC_FPE_UNDERFLOW) cw &= ~_FPU_MASK_UM;
-  if (options.fpe & GFC_FPE_INEXACT) cw &= ~_FPU_MASK_PM;
+  cw |= _FPU_MASK_ALL;
+  cw &= ~excepts;
 
-  __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
+  __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
 
   if (has_sse())
     {
@@ -120,22 +123,17 @@ void set_fpu (void)
 
       __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
 
-      cw_sse &= 0xffff0000;
-      cw_sse |= (_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM
-                | _FPU_MASK_UM | _FPU_MASK_PM ) << 7;
+      /* The SSE exception masks are shifted by 7 bits.  */
+      cw_sse |= _FPU_MASK_ALL << 7;
+      cw_sse &= ~(excepts << 7);
 
-      if (options.fpe & GFC_FPE_INVALID) cw_sse &= ~(_FPU_MASK_IM << 7);
-      if (options.fpe & GFC_FPE_DENORMAL) cw_sse &= ~(_FPU_MASK_DM << 7);
-      if (options.fpe & GFC_FPE_ZERO) cw_sse &= ~(_FPU_MASK_ZM << 7);
-      if (options.fpe & GFC_FPE_OVERFLOW) cw_sse &= ~(_FPU_MASK_OM << 7);
-      if (options.fpe & GFC_FPE_UNDERFLOW) cw_sse &= ~(_FPU_MASK_UM << 7);
-      if (options.fpe & GFC_FPE_INEXACT) cw_sse &= ~(_FPU_MASK_PM << 7);
+      /* Clear stalled exception flags.  */
+      cw_sse &= ~0x3f;
 
       __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
     }
 }
 
-
 int
 get_fpu_except_flags (void)
 {

Reply via email to