On Mon, 27 Dec 2010, Mark Kettenis wrote: ... > Looks like you're fooled by another not-so-subtle difference between > i386 and amd64. On amd64 the CPUs are spun up when they attach. On > i386 that doesn't happen until we call cpu_boot_secondary_processors() > in main(), which is long after npx(4) attaches and npxinit() gets called > on the primary CPU.
Ah, I missed the CPUF_GO bit. Gotta love the simplicity of the MP boot procedure. > > So no, I don't think it's safe to move up the npxinit() call into > > cpu_init(), unless you're suggesting that we delay initializing > > secondary cpus until after the isa bus is probed (barf). The > > consistent alternative would be to leave the fpuinit() call in > > cpu_hatch() and call it for the primary cpu near the end of > > init_x86_64(), that being the rough equivalent for the primary cpu. > > I think that is actually what I'd prefer. Okay, check out the revised diff below. I've tested the updated amd64 part; I would appreciate a confirmation from an i386 w/X11 user for that part. Philip Index: amd64/amd64/fpu.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v retrieving revision 1.21 diff -u -p -r1.21 fpu.c --- amd64/amd64/fpu.c 29 Sep 2010 15:11:31 -0000 1.21 +++ amd64/amd64/fpu.c 27 Dec 2010 20:05:56 -0000 @@ -95,6 +95,8 @@ void fpudna(struct cpu_info *); static int x86fpflags_to_siginfo(u_int32_t); +uint32_t fpu_mxcsr_mask; + /* * Init the FPU. */ @@ -103,6 +105,16 @@ fpuinit(struct cpu_info *ci) { lcr0(rcr0() & ~(CR0_EM|CR0_TS)); fninit(); + if (fpu_mxcsr_mask == 0) { + struct fxsave64 fx __attribute__((aligned(16))); + + bzero(&fx, sizeof(fx)); + fxsave(&fx); + if (fx.fx_mxcsr_mask) + fpu_mxcsr_mask = fx.fx_mxcsr_mask; + else + fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; + } lcr0(rcr0() | (CR0_TS)); } Index: amd64/amd64/machdep.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v retrieving revision 1.129 diff -u -p -r1.129 machdep.c --- amd64/amd64/machdep.c 22 Nov 2010 21:07:16 -0000 1.129 +++ amd64/amd64/machdep.c 27 Dec 2010 20:05:56 -0000 @@ -658,9 +658,11 @@ sys_sigreturn(struct proc *p, void *v, r fpusave_proc(p, 0); if (ksc.sc_fpstate) { - if ((error = copyin(ksc.sc_fpstate, - &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave, sizeof (struct fxsave64)))) + struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave; + + if ((error = copyin(ksc.sc_fpstate, fx, sizeof(*fx)))) return (error); + fx->fx_mxcsr &= fpu_mxcsr_mask; p->p_md.md_flags |= MDP_USEDFPU; } @@ -1506,6 +1508,7 @@ init_x86_64(paddr_t first_avail) cpu_init_idt(); intr_default_setup(); + fpuinit(&cpu_info_primary); softintr_init(); splraise(IPL_IPI); Index: amd64/amd64/process_machdep.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v retrieving revision 1.9 diff -u -p -r1.9 process_machdep.c --- amd64/amd64/process_machdep.c 29 Sep 2010 15:11:31 -0000 1.9 +++ amd64/amd64/process_machdep.c 27 Dec 2010 20:05:56 -0000 @@ -137,7 +137,7 @@ process_read_fpregs(struct proc *p, stru frame->fx_fsw = 0x0000; frame->fx_ftw = 0xff; frame->fx_mxcsr = __INITIAL_MXCSR__; - frame->fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; + frame->fx_mxcsr_mask = fpu_mxcsr_mask; p->p_md.md_flags |= MDP_USEDFPU; } @@ -198,6 +198,7 @@ process_write_fpregs(struct proc *p, str } memcpy(frame, ®s->fxstate, sizeof(*regs)); + frame->fx_mxcsr &= fpu_mxcsr_mask; return (0); } Index: amd64/include/fpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v retrieving revision 1.7 diff -u -p -r1.7 fpu.h --- amd64/include/fpu.h 20 Nov 2010 20:11:17 -0000 1.7 +++ amd64/include/fpu.h 27 Dec 2010 20:05:56 -0000 @@ -49,6 +49,8 @@ struct savefpu { struct trapframe; struct cpu_info; +extern uint32_t fpu_mxcsr_mask; + void fpuinit(struct cpu_info *); void fpudrop(void); void fpudiscard(struct proc *); Index: i386/i386/machdep.c =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v retrieving revision 1.485 diff -u -p -r1.485 machdep.c --- i386/i386/machdep.c 2 Oct 2010 23:31:34 -0000 1.485 +++ i386/i386/machdep.c 27 Dec 2010 20:05:57 -0000 @@ -2362,9 +2362,12 @@ sys_sigreturn(struct proc *p, void *v, r npxsave_proc(p, 0); if (context.sc_fpstate) { - if ((error = copyin(context.sc_fpstate, - &p->p_addr->u_pcb.pcb_savefpu, sizeof (union savefpu)))) + union savefpu *sfp = &p->p_addr->u_pcb.pcb_savefpu; + + if ((error = copyin(context.sc_fpstate, sfp, sizeof(*sfp)))) return (error); + if (i386_use_fxsave) + sfp->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask; p->p_md.md_flags |= MDP_USEDFPU; } Index: i386/i386/process_machdep.c =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/process_machdep.c,v retrieving revision 1.26 diff -u -p -r1.26 process_machdep.c --- i386/i386/process_machdep.c 27 Dec 2010 12:48:35 -0000 1.26 +++ i386/i386/process_machdep.c 27 Dec 2010 20:05:57 -0000 @@ -137,6 +137,7 @@ process_fninit_xmm(struct savexmm *sxmm) memset(sxmm, 0, sizeof(*sxmm)); sxmm->sv_env.en_cw = __OpenBSD_NPXCW__; sxmm->sv_env.en_mxcsr = __INITIAL_MXCSR__; + sxmm->sv_env.en_mxcsr_mask = fpu_mxcsr_mask; sxmm->sv_env.en_sw = 0x0000; sxmm->sv_env.en_tw = 0x00; } @@ -359,6 +360,7 @@ process_write_xmmregs(struct proc *p, co p->p_md.md_flags |= MDP_USEDFPU; memcpy(&frame->sv_xmm, regs, sizeof(*regs)); + frame->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask; return (0); } Index: i386/include/npx.h =================================================================== RCS file: /cvs/src/sys/arch/i386/include/npx.h,v retrieving revision 1.15 diff -u -p -r1.15 npx.h --- i386/include/npx.h 29 Sep 2010 15:11:31 -0000 1.15 +++ i386/include/npx.h 27 Dec 2010 20:05:58 -0000 @@ -96,7 +96,7 @@ struct envxmm { uint16_t en_fos; /* FPU Data pointer selector */ uint16_t en_rsvd2; uint32_t en_mxcsr; /* MXCSR Register State */ - uint32_t en_rsvd3; + uint32_t en_mxcsr_mask; /* Mask for valid MXCSR bits (may be 0) */ }; /* FPU regsters in the extended save format. */ @@ -142,6 +142,7 @@ struct emcsts { * Set Reference, pg. 3-369. */ #define __INITIAL_MXCSR__ 0x1f80 +#define __INITIAL_MXCSR_MASK__ 0xffbf /* * The standard control word from finit is 0x37F, giving: @@ -157,6 +158,8 @@ void process_s87_to_xmm(const struct struct cpu_info; struct trapframe; + +extern unsigned int fpu_mxcsr_mask; void npxinit(struct cpu_info *); void npxtrap(struct trapframe *); Index: i386/isa/npx.c =================================================================== RCS file: /cvs/src/sys/arch/i386/isa/npx.c,v retrieving revision 1.53 diff -u -p -r1.53 npx.c --- i386/isa/npx.c 29 Sep 2010 15:11:31 -0000 1.53 +++ i386/isa/npx.c 27 Dec 2010 20:05:58 -0000 @@ -97,6 +97,11 @@ #define clts() __asm("clts") #define stts() lcr0(rcr0() | CR0_TS) +/* + * The mxcsr_mask for this host, taken from fxsave() on the primary CPU + */ +unsigned int fpu_mxcsr_mask; + int npxintr(void *); static int npxprobe1(struct isa_attach_args *); static int x86fpflags_to_siginfo(u_int32_t); @@ -350,6 +355,16 @@ npxinit(struct cpu_info *ci) i386_fpu_fdivbug = 1; printf("%s: WARNING: Pentium FDIV bug detected!\n", ci->ci_dev.dv_xname); + } + if (fpu_mxcsr_mask == 0 && i386_use_fxsave) { + struct savexmm xm __attribute__((aligned(16))); + + bzero(&xm, sizeof(xm)); + fxsave(&xm); + if (xm.sv_env.en_mxcsr_mask) + fpu_mxcsr_mask = xm.sv_env.en_mxcsr_mask; + else + fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; } lcr0(rcr0() | (CR0_TS)); }