Hello! Attached patch introduces __builtin_ia32_fxsave to crtfastmath.c. In addition, it clears only fxsave.mxcsr_mask, as we will look at this field only. Also, we don't have to execute additional stmxcsr, since fxsave also saves this register.
2012-10-26 Uros Bizjak <ubiz...@gmail.com> * config/i386/crtfastmath.c (set_fast_math): Use __builtin_ia32_fxsave. Clear only fxsave.mxcsr_mask. Use saved mxcsr from fxsave structure when appropriate. Correct structure element types. * config/i386/t-crtfm (crtfastmath.o): Compile with -mfxsr, remove -minline-all-stringops from compile flags. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/crtfastmath.c =================================================================== --- config/i386/crtfastmath.c (revision 192841) +++ config/i386/crtfastmath.c (working copy) @@ -91,36 +91,41 @@ set_fast_math (void) return; #endif /* __sun__ && __svr4__ */ - mxcsr = __builtin_ia32_stmxcsr () | MXCSR_FTZ; - if (edx & bit_FXSAVE) { /* Check if DAZ is available. */ struct { - unsigned short int cwd; - unsigned short int swd; - unsigned short int twd; - unsigned short int fop; - long int fip; - long int fcs; - long int foo; - long int fos; - long int mxcsr; - long int mxcsr_mask; - long int st_space[32]; - long int xmm_space[32]; - long int padding[56]; + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + unsigned int fip; + unsigned int fcs; + unsigned int foo; + unsigned int fos; + unsigned int mxcsr; + unsigned int mxcsr_mask; + unsigned int st_space[32]; + unsigned int xmm_space[32]; + unsigned int padding[56]; } __attribute__ ((aligned (16))) fxsave; - __builtin_memset (&fxsave, 0, sizeof (fxsave)); + /* This is necessary since some implementations of FXSAVE + do not modify reserved areas within the image. */ + fxsave.mxcsr_mask = 0; - asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave)); + __builtin_ia32_fxsave (&fxsave); + mxcsr = fxsave.mxcsr; + if (fxsave.mxcsr_mask & MXCSR_DAZ) mxcsr |= MXCSR_DAZ; } + else + mxcsr = __builtin_ia32_stmxcsr (); + mxcsr |= MXCSR_FTZ; __builtin_ia32_ldmxcsr (mxcsr); } #else Index: config/i386/t-crtfm =================================================================== --- config/i386/t-crtfm (revision 192841) +++ config/i386/t-crtfm (working copy) @@ -1,4 +1,4 @@ # This is an endfile, Use -minline-all-stringops to ensure # that __builtin_memset doesn't refer to the lib function memset(). crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c - $(gcc_compile) -msse -minline-all-stringops -c $< + $(gcc_compile) -mfxsr -msse -c $<