On Tue, Jul 17, 2012 at 08:09:15PM +0300, Konstantin Belousov wrote:
> On Wed, Jul 18, 2012 at 02:03:58AM +1000, Bruce Evans wrote:
> > On Wed, 18 Jul 2012, Bruce Evans wrote:
> > 
> > >On Wed, 18 Jul 2012, Bruce Evans wrote:
> > >>..
> > >>So I still want a single kernel exception handle that merges the statuses.
> > >
> > >Merge the independent statuses modified by their independent controls:
> > >
> > >   return (fpetable[(fpsw & ((~fpcw & 0x3f) | 0x40)) |
> > >       ((mxcsr & (mxcsr >> 16)) & 0x3f)]);
> > >
> > >Use the same trap handler that reads all these statuses and controls.
> > 
> > Changed my mind again.  Need sleep.  Merging the traps breaks the rule
> > that i387 traps occur on the first non-control instruction after the
> > one that causes the exception.  There may be mixed code like this:
> > 
> >     fldz
> >     fld1
> >     fdiv    %st,%st(1)      # i387 exception now; i387 trap pending
> >     load    $0 into %xmm0
> >     load    $1 into %xmm1
> >     divsd   %xmm0,%xmm1     # SSE exception now; SSE trap now
> > 
> > Debuggers can see both exception states including the i387 trap pending,
> > provided the i387 trap is not bogusly cleared, either by never clearing
> > it in the kernel trap handler or by using a separate trap handler that
> > doesn't clear it for T_XMMFLT.  They can even figure out that an SSE
> > trap occurred, because the i387 trap is still pending.
> > 
> >     ...
> >     fnop                    # i387 trap on first non-control FP instr...
> > 
> > Apart from doing the bogus fnclex for T_XMMFLT and the delayed effect of
> > i387 status bits, merging or not merging the statuses makes little
> > difference, since if a status bit is set and is not masked according
> > to its control word, then it will generate a trap soon if it didn't
> > genearate the current one.
> 
> The trap number is available for SA_SIGINFO type of handlers with
> si_trapno member of siginfo_t. I think this is final argument to have
> separate fputrap_{x87,sse} functions.
> 
> For amd64, SSE hardware is FPU, so I do not see much wrong with the name.
> 
> I changed fputrap_sse() according to your suggestion.

Below is the actually tested patch. After it, I put the test program.

diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index a7812b7..ae241ce 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$");
 #define        fxrstor(addr)           __asm __volatile("fxrstor %0" : : "m" 
(*(addr)))
 #define        fxsave(addr)            __asm __volatile("fxsave %0" : "=m" 
(*(addr)))
 #define        ldmxcsr(csr)            __asm __volatile("ldmxcsr %0" : : "m" 
(csr))
+#define        stmxcsr(addr)           __asm __volatile("stmxcsr %0" : : "m" 
(*(addr)))
 
 static __inline void
 xrstor(char *addr, uint64_t mask)
@@ -105,6 +106,7 @@ void        fnstsw(caddr_t addr);
 void   fxsave(caddr_t addr);
 void   fxrstor(caddr_t addr);
 void   ldmxcsr(u_int csr);
+void   stmxcsr(u_int csr);
 void   xrstor(char *addr, uint64_t mask);
 void   xsave(char *addr, uint64_t mask);
 
@@ -113,9 +115,6 @@ void        xsave(char *addr, uint64_t mask);
 #define        start_emulating()       load_cr0(rcr0() | CR0_TS)
 #define        stop_emulating()        clts()
 
-#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
-#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
-
 CTASSERT(sizeof(struct savefpu) == 512);
 CTASSERT(sizeof(struct xstate_hdr) == 64);
 CTASSERT(sizeof(struct savefpu_ymm) == 832);
@@ -514,11 +513,15 @@ static char fpetable[128] = {
 };
 
 /*
- * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
+ * Preserve the FP status word, clear FP exceptions for x87, then
+ * generate a SIGFPE.
+ *
+ * Clearing exceptions was necessary mainly to avoid IRQ13 bugs and is
+ * engraved in our i386 ABI.  We now depend on longjmp() restoring a
+ * usable state.  Restoring the state or examining it might fail if we
+ * didn't clear exceptions.
  *
- * Clearing exceptions is necessary mainly to avoid IRQ13 bugs.  We now
- * depend on longjmp() restoring a usable state.  Restoring the state
- * or examining it might fail if we didn't clear exceptions.
+ * For SSE exceptions, the exceptions are not cleared.
  *
  * The error code chosen will be one of the FPE_... macros. It will be
  * sent as the second argument to old BSD-style signal handlers and as
@@ -531,8 +534,9 @@ static char fpetable[128] = {
  * solution for signals other than SIGFPE.
  */
 int
-fputrap()
+fputrap_x87(void)
 {
+       struct savefpu *pcb_save;
        u_short control, status;
 
        critical_enter();
@@ -543,19 +547,40 @@ fputrap()
         * wherever they are.
         */
        if (PCPU_GET(fpcurthread) != curthread) {
-               control = GET_FPU_CW(curthread);
-               status = GET_FPU_SW(curthread);
+               pcb_save = curthread->td_pcb->pcb_save;
+               control = pcb_save->sv_env.en_cw;
+               status = pcb_save->sv_env.en_sw;
        } else {
                fnstcw(&control);
                fnstsw(&status);
+               fnclex();
        }
 
-       if (PCPU_GET(fpcurthread) == curthread)
-               fnclex();
        critical_exit();
        return (fpetable[status & ((~control & 0x3f) | 0x40)]);
 }
 
+int
+fputrap_sse(void)
+{
+       u_int mxcsr;
+
+       critical_enter();
+
+       /*
+        * Coomparing with the x87 #MF handler, we do not clear
+        * exceptions from the mxcsr.
+        */
+       if (PCPU_GET(fpcurthread) != curthread)
+               mxcsr = curthread->td_pcb->pcb_save->sv_env.en_mxcsr;
+       else
+               stmxcsr(&mxcsr);
+
+       critical_exit();
+
+       return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
+}
+
 /*
  * Implement device not available (DNA) exception
  *
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 75e15e0..57d1cc2 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -328,7 +328,7 @@ trap(struct trapframe *frame)
                        break;
 
                case T_ARITHTRAP:       /* arithmetic trap */
-                       ucode = fputrap();
+                       ucode = fputrap_x87();
                        if (ucode == -1)
                                goto userout;
                        i = SIGFPE;
@@ -442,7 +442,9 @@ trap(struct trapframe *frame)
                        break;
 
                case T_XMMFLT:          /* SIMD floating-point exception */
-                       ucode = 0; /* XXX */
+                       ucode = fputrap_sse();
+                       if (ucode == -1)
+                               goto userout;
                        i = SIGFPE;
                        break;
                }
diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h
index 98a016b..7d0f0ea 100644
--- a/sys/amd64/include/fpu.h
+++ b/sys/amd64/include/fpu.h
@@ -62,7 +62,8 @@ int   fpusetregs(struct thread *td, struct savefpu *addr,
            char *xfpustate, size_t xfpustate_size);
 int    fpusetxstate(struct thread *td, char *xfpustate,
            size_t xfpustate_size);
-int    fputrap(void);
+int    fputrap_sse(void);
+int    fputrap_x87(void);
 void   fpuuserinited(struct thread *td);
 struct fpu_kern_ctx *fpu_kern_alloc_ctx(u_int flags);
 void   fpu_kern_free_ctx(struct fpu_kern_ctx *ctx);

fpuex.c:

/* $Id: fpuex.c,v 1.2 2012/07/17 18:22:54 kostik Exp kostik $ */
#include <sys/types.h>
#include <err.h>
#include <errno.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ucontext.h>
#if defined(__amd64__)
#include <machine/fpu.h>
#elif defined(__i386__)
#include <machine/npx.h>
#endif

static void
handler(int signo __unused, siginfo_t *info, void *v)
{
        ucontext_t *uap;
        mcontext_t *mc;
#if defined(__amd64__)
        struct savefpu *sf;
#elif defined(__i386__)
        struct savexmm *sf;
#endif

        uap = v;
        mc = &uap->uc_mcontext;
        sf = &mc->mc_fpstate;
        printf("intr handler: trapno %d code %d cw 0x%04x sw 0x%04x mxcsr 
0x%08x ip 0x%lx\n",
            info->si_trapno, info->si_code,
#if defined(__amd64__)
            sf->sv_env.en_cw, sf->sv_env.en_sw, sf->sv_env.en_mxcsr,
            sf->sv_env.en_rip
#elif defined(__i386__)
            sf->sv_env.en_cw, sf->sv_env.en_sw,
            sf->sv_env.en_mxcsr, (unsigned long)sf->sv_env.en_fip
#endif
            );
        exit(0);
}

double a[3];

double x(void) __attribute__((noinline));
double
x(void)
{

        a[3] = a[0] / a[1];
        return (a[3]);
}

int
main(void)
{
        struct sigaction sa;
        uint32_t mxcsr;
        uint16_t cw;

        bzero(&sa, sizeof(sa));
        sa.sa_sigaction = handler;
        sa.sa_flags = SA_SIGINFO;
        if (sigaction(SIGFPE, &sa, NULL) == -1)
                err(1, "sigaction SIGFPE");

        mxcsr = 0;
        __asm __volatile("ldmxcsr %0" : : "m" (mxcsr));
#ifdef __i386__
        cw = 0;
        __asm __volatile("fldcw %0" : : "m" (cw));
#endif
        a[0] = 1.0;
        a[1] = 0.0;
        x();
        return (0);
}

Attachment: pgp4b4rbjKIuc.pgp
Description: PGP signature

Reply via email to