Hello!

Attached patch introduces __builtin_ia32_fxsave to crtfastmath.c. In
addition, it clears only fxsave.mxcsr_mask, as we will look at this
field only. Also, we don't have to execute additional stmxcsr, since
fxsave also saves this register.

2012-10-26  Uros Bizjak  <ubiz...@gmail.com>

        * config/i386/crtfastmath.c (set_fast_math): Use __builtin_ia32_fxsave.
        Clear only fxsave.mxcsr_mask.  Use saved mxcsr from fxsave structure
        when appropriate.  Correct structure element types.
        * config/i386/t-crtfm (crtfastmath.o): Compile with -mfxsr, remove
        -minline-all-stringops from compile flags.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/crtfastmath.c
===================================================================
--- config/i386/crtfastmath.c   (revision 192841)
+++ config/i386/crtfastmath.c   (working copy)
@@ -91,36 +91,41 @@ set_fast_math (void)
        return;
 #endif /* __sun__ && __svr4__ */
 
-      mxcsr = __builtin_ia32_stmxcsr () | MXCSR_FTZ;
-
       if (edx & bit_FXSAVE)
        {
          /* Check if DAZ is available.  */
          struct
            {
-             unsigned short int cwd;
-             unsigned short int swd;
-             unsigned short int twd;
-             unsigned short int fop;
-             long int fip;
-             long int fcs;
-             long int foo;
-             long int fos;
-             long int mxcsr;
-             long int mxcsr_mask;
-             long int st_space[32];
-             long int xmm_space[32];
-             long int padding[56];
+             unsigned short cwd;
+             unsigned short swd;
+             unsigned short twd;
+             unsigned short fop;
+             unsigned int fip;
+             unsigned int fcs;
+             unsigned int foo;
+             unsigned int fos;
+             unsigned int mxcsr;
+             unsigned int mxcsr_mask;
+             unsigned int st_space[32];
+             unsigned int xmm_space[32];
+             unsigned int padding[56];
            } __attribute__ ((aligned (16))) fxsave;
 
-         __builtin_memset (&fxsave, 0, sizeof (fxsave));
+         /* This is necessary since some implementations of FXSAVE
+            do not modify reserved areas within the image.  */
+         fxsave.mxcsr_mask = 0;
 
-         asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave));
+         __builtin_ia32_fxsave (&fxsave);
 
+         mxcsr = fxsave.mxcsr;
+
          if (fxsave.mxcsr_mask & MXCSR_DAZ)
            mxcsr |= MXCSR_DAZ;
        }
+      else
+       mxcsr = __builtin_ia32_stmxcsr ();
 
+      mxcsr |= MXCSR_FTZ;
       __builtin_ia32_ldmxcsr (mxcsr);
     }
 #else
Index: config/i386/t-crtfm
===================================================================
--- config/i386/t-crtfm (revision 192841)
+++ config/i386/t-crtfm (working copy)
@@ -1,4 +1,4 @@
 # This is an endfile, Use -minline-all-stringops to ensure
 # that __builtin_memset doesn't refer to the lib function memset().
 crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c
-       $(gcc_compile) -msse -minline-all-stringops -c $<
+       $(gcc_compile) -mfxsr -msse -c $<

Reply via email to