Following patch substantially improves generated code for get_fpu_trap_exceptions, reducing insn count from 31 to 9.
2015-08-04 Uros Bizjak <ubiz...@gmail.com> * config/fpu-387.h (get_fpu_trap_exceptions): Add temporary variable to improve generated code. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/fpu-387.h =================================================================== --- config/fpu-387.h (revision 226547) +++ config/fpu-387.h (working copy) @@ -215,12 +215,13 @@ set_fpu (void) int get_fpu_trap_exceptions (void) { + unsigned short cw; + int mask; int res = 0; - unsigned short cw; __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); - cw &= _FPU_MASK_ALL; - + mask = cw; + if (has_sse()) { unsigned int cw_sse; @@ -228,16 +229,18 @@ get_fpu_trap_exceptions (void) __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); /* The SSE exception masks are shifted by 7 bits. */ - cw = cw | ((cw_sse >> 7) & _FPU_MASK_ALL); + mask |= (cw_sse >> 7); } - if (~cw & _FPU_MASK_IM) res |= GFC_FPE_INVALID; - if (~cw & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; - if (~cw & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; - if (~cw & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; - if (~cw & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; - if (~cw & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; + mask = ~mask & _FPU_MASK_ALL; + if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID; + if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; + if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; + if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; + if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; + if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; + return res; }