Re: correct mxcsr+mxcsr_mask handling (revised)

2010-12-27 Thread Josh Elsasser
On Mon, Dec 27, 2010 at 12:07:55PM -0800, Philip Guenther wrote:
> Okay, check out the revised diff below.  I've tested the updated amd64 
> part; I would appreciate a confirmation from an i386 w/X11 user for that 
> part.

Works for me, X runs and test program no longer drops us into ddb:

OpenBSD 4.8-current (GENERIC) #43: Mon Dec 27 13:08:01 PST 2010
jo...@flint.joshe.fake:/usr/src/sys/arch/i386/compile/GENERIC
cpu0: Intel(R) Celeron(R) M processor 1.40GHz ("GenuineIntel" 686-class) 1.40 
GHz
cpu0: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,SEP,MTRR,PGE,MCA,CMOV,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,TM,SBF

It couldn't hurt to also test this on a machine where the fxsave
instruction writes an empty mask. This should be any machine with SSE
is in the cpu flags list and without SSE2.

ok joshe@

> Index: amd64/amd64/fpu.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 fpu.c
> --- amd64/amd64/fpu.c 29 Sep 2010 15:11:31 -  1.21
> +++ amd64/amd64/fpu.c 27 Dec 2010 20:05:56 -
> @@ -95,6 +95,8 @@
>  void fpudna(struct cpu_info *);
>  static int x86fpflags_to_siginfo(u_int32_t);
>  
> +uint32_t fpu_mxcsr_mask;
> +
>  /*
>   * Init the FPU.
>   */
> @@ -103,6 +105,16 @@ fpuinit(struct cpu_info *ci)
>  {
>   lcr0(rcr0() & ~(CR0_EM|CR0_TS));
>   fninit();
> + if (fpu_mxcsr_mask == 0) {
> + struct fxsave64 fx __attribute__((aligned(16)));
> +
> + bzero(&fx, sizeof(fx));
> + fxsave(&fx);
> + if (fx.fx_mxcsr_mask)
> + fpu_mxcsr_mask = fx.fx_mxcsr_mask;
> + else
> + fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
> + }
>   lcr0(rcr0() | (CR0_TS));
>  }
>  
> Index: amd64/amd64/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 machdep.c
> --- amd64/amd64/machdep.c 22 Nov 2010 21:07:16 -  1.129
> +++ amd64/amd64/machdep.c 27 Dec 2010 20:05:56 -
> @@ -658,9 +658,11 @@ sys_sigreturn(struct proc *p, void *v, r
>   fpusave_proc(p, 0);
>  
>   if (ksc.sc_fpstate) {
> - if ((error = copyin(ksc.sc_fpstate,
> - &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave, sizeof (struct 
> fxsave64
> + struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;
> +
> + if ((error = copyin(ksc.sc_fpstate, fx, sizeof(*fx
>   return (error);
> + fx->fx_mxcsr &= fpu_mxcsr_mask;
>   p->p_md.md_flags |= MDP_USEDFPU;
>   }
>  
> @@ -1506,6 +1508,7 @@ init_x86_64(paddr_t first_avail)
>   cpu_init_idt();
>  
>   intr_default_setup();
> + fpuinit(&cpu_info_primary);
>  
>   softintr_init();
>   splraise(IPL_IPI);
> Index: amd64/amd64/process_machdep.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 process_machdep.c
> --- amd64/amd64/process_machdep.c 29 Sep 2010 15:11:31 -  1.9
> +++ amd64/amd64/process_machdep.c 27 Dec 2010 20:05:56 -
> @@ -137,7 +137,7 @@ process_read_fpregs(struct proc *p, stru
>   frame->fx_fsw = 0x;
>   frame->fx_ftw = 0xff;
>   frame->fx_mxcsr = __INITIAL_MXCSR__;
> - frame->fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
> + frame->fx_mxcsr_mask = fpu_mxcsr_mask;
>   p->p_md.md_flags |= MDP_USEDFPU;
>   }
>  
> @@ -198,6 +198,7 @@ process_write_fpregs(struct proc *p, str
>   }
>  
>   memcpy(frame, ®s->fxstate, sizeof(*regs));
> + frame->fx_mxcsr &= fpu_mxcsr_mask;
>   return (0);
>  }
>  
> Index: amd64/include/fpu.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
> retrieving revision 1.7
> diff -u -p -r1.7 fpu.h
> --- amd64/include/fpu.h   20 Nov 2010 20:11:17 -  1.7
> +++ amd64/include/fpu.h   27 Dec 2010 20:05:56 -
> @@ -49,6 +49,8 @@ struct savefpu {
>  struct trapframe;
>  struct cpu_info;
>  
> +extern uint32_t  fpu_mxcsr_mask;
> +
>  void fpuinit(struct cpu_info *);
>  void fpudrop(void);
>  void fpudiscard(struct proc *);
> Index: i386/i386/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
> retrieving revision 1.485
> diff -u -p -r1.485 machdep.c
> --- i386/i386/machdep.c   2 Oct 2010 23:31:34 -   1.485
> +++ i386/i386/machdep.c   27 Dec 2010 20:05:57 -
> @@ -2362,9 +2362,12 @@ sys_sigreturn(struct proc *p, void *v, r
>   npxsave_proc(p, 0);
>  
>   if (context.sc_fpstate) {
> - if ((error = copyin(context.sc_fpstate,
> -

Re: correct mxcsr+mxcsr_mask handling (revised)

2010-12-27 Thread Philip Guenther
On Mon, 27 Dec 2010, Mark Kettenis wrote:
...
> Looks like you're fooled by another not-so-subtle difference between 
> i386 and amd64.  On amd64 the CPUs are spun up when they attach.  On 
> i386 that doesn't happen until we call cpu_boot_secondary_processors() 
> in main(), which is long after npx(4) attaches and npxinit() gets called 
> on the primary CPU.

Ah, I missed the CPUF_GO bit.  Gotta love the simplicity of the MP boot 
procedure.


> > So no, I don't think it's safe to move up the npxinit() call into
> > cpu_init(), unless you're suggesting that we delay initializing
> > secondary cpus until after the isa bus is probed (barf).  The
> > consistent alternative would be to leave the fpuinit() call in
> > cpu_hatch() and call it for the primary cpu near the end of
> > init_x86_64(), that being the rough equivalent for the primary cpu.
> 
> I think that is actually what I'd prefer.

Okay, check out the revised diff below.  I've tested the updated amd64 
part; I would appreciate a confirmation from an i386 w/X11 user for that 
part.


Philip


Index: amd64/amd64/fpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v
retrieving revision 1.21
diff -u -p -r1.21 fpu.c
--- amd64/amd64/fpu.c   29 Sep 2010 15:11:31 -  1.21
+++ amd64/amd64/fpu.c   27 Dec 2010 20:05:56 -
@@ -95,6 +95,8 @@
 void fpudna(struct cpu_info *);
 static int x86fpflags_to_siginfo(u_int32_t);
 
+uint32_t   fpu_mxcsr_mask;
+
 /*
  * Init the FPU.
  */
@@ -103,6 +105,16 @@ fpuinit(struct cpu_info *ci)
 {
lcr0(rcr0() & ~(CR0_EM|CR0_TS));
fninit();
+   if (fpu_mxcsr_mask == 0) {
+   struct fxsave64 fx __attribute__((aligned(16)));
+
+   bzero(&fx, sizeof(fx));
+   fxsave(&fx);
+   if (fx.fx_mxcsr_mask)
+   fpu_mxcsr_mask = fx.fx_mxcsr_mask;
+   else
+   fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+   }
lcr0(rcr0() | (CR0_TS));
 }
 
Index: amd64/amd64/machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.129
diff -u -p -r1.129 machdep.c
--- amd64/amd64/machdep.c   22 Nov 2010 21:07:16 -  1.129
+++ amd64/amd64/machdep.c   27 Dec 2010 20:05:56 -
@@ -658,9 +658,11 @@ sys_sigreturn(struct proc *p, void *v, r
fpusave_proc(p, 0);
 
if (ksc.sc_fpstate) {
-   if ((error = copyin(ksc.sc_fpstate,
-   &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave, sizeof (struct 
fxsave64
+   struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;
+
+   if ((error = copyin(ksc.sc_fpstate, fx, sizeof(*fx
return (error);
+   fx->fx_mxcsr &= fpu_mxcsr_mask;
p->p_md.md_flags |= MDP_USEDFPU;
}
 
@@ -1506,6 +1508,7 @@ init_x86_64(paddr_t first_avail)
cpu_init_idt();
 
intr_default_setup();
+   fpuinit(&cpu_info_primary);
 
softintr_init();
splraise(IPL_IPI);
Index: amd64/amd64/process_machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v
retrieving revision 1.9
diff -u -p -r1.9 process_machdep.c
--- amd64/amd64/process_machdep.c   29 Sep 2010 15:11:31 -  1.9
+++ amd64/amd64/process_machdep.c   27 Dec 2010 20:05:56 -
@@ -137,7 +137,7 @@ process_read_fpregs(struct proc *p, stru
frame->fx_fsw = 0x;
frame->fx_ftw = 0xff;
frame->fx_mxcsr = __INITIAL_MXCSR__;
-   frame->fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+   frame->fx_mxcsr_mask = fpu_mxcsr_mask;
p->p_md.md_flags |= MDP_USEDFPU;
}
 
@@ -198,6 +198,7 @@ process_write_fpregs(struct proc *p, str
}
 
memcpy(frame, ®s->fxstate, sizeof(*regs));
+   frame->fx_mxcsr &= fpu_mxcsr_mask;
return (0);
 }
 
Index: amd64/include/fpu.h
===
RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
retrieving revision 1.7
diff -u -p -r1.7 fpu.h
--- amd64/include/fpu.h 20 Nov 2010 20:11:17 -  1.7
+++ amd64/include/fpu.h 27 Dec 2010 20:05:56 -
@@ -49,6 +49,8 @@ struct savefpu {
 struct trapframe;
 struct cpu_info;
 
+extern uint32_tfpu_mxcsr_mask;
+
 void fpuinit(struct cpu_info *);
 void fpudrop(void);
 void fpudiscard(struct proc *);
Index: i386/i386/machdep.c
===
RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
retrieving revision 1.485
diff -u -p -r1.485 machdep.c
--- i386/i386/machdep.c 2 Oct 2010 23:31:34 -   1.485
+++ i386/i386/machdep.c 27 Dec 2010 20:05:57 -
@@ -2362,9 +2362,12 @@ sys_sigreturn(struct proc *p, void *v, r
npxsave_proc(p, 0);

Re: correct mxcsr+mxcsr_mask handling (revised)

2010-12-27 Thread Mark Kettenis
> Date: Sun, 26 Dec 2010 21:57:21 -0800
> From: Philip Guenther 
> 
> On Sun, Dec 26, 2010 at 7:24 AM, Mark Kettenis  
> wrote:
> >> Originally worked out by joshe@; this corrects the timing of the call to
> >> fpuinit() for the primary CPU on amd64 so that the saved mask value is
> >> initialized correctly.
> >
> > Hmm, I'm not too happy about moving fpuinit() out of cpu_hatch() on
> > amd64, while leaving npxinit() in there for i386.
> 
> Well, on i386, npxinit() is called for the primary cpu when the npx is
> attached, which is way after the secondary cpus are attached.  Can we
> assume it to be safe to call npxinit() before the npx device is
> attached?  It wouldn't surprise me to hear that it only works when npx
> errors are reported using exceptions and not ISA interrupts...which
> would explain why the early calls are fine on multi-CPU systems,
> 'cause they all use exceptions.

Looks like you're fooled by another not-so-subtle difference between
i386 and amd64.  On amd64 the CPUs are spun up when they attach.  On
i386 that doesn't happen until we call cpu_boot_secondary_processors()
in main(), which is long after npx(4) attaches and npxinit() gets
called on the primary CPU.

> So no, I don't think it's safe to move up the npxinit() call into
> cpu_init(), unless you're suggesting that we delay initializing
> secondary cpus until after the isa bus is probed (barf).  The
> consistent alternative would be to leave the fpuinit() call in
> cpu_hatch() and call it for the primary cpu near the end of
> init_x86_64(), that being the rough equivalent for the primary cpu.

I think that is actually what I'd prefer.

> >> + if (CPU_IS_PRIMARY(ci)) {
> >> + struct fxsave64 fx __attribute__((aligned(16)));
> >> +
> >> + bzero(&fx, sizeof(fx));
> >> + fxsave(&fx);
> >> + if (fx.fx_mxcsr_mask)
> >> + fpu_mxcsr_mask = fx.fx_mxcsr_mask;
> >> + else
> >> + fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
> >> + }
> >
> > This function will be run again upon resume.  Now overwriting
> > fpu_mxcsr_mask shouldn't hurt, but perhaps replacing:
> >
> >if (CPU_IS_PRIMARY(ci)) {
> >
> > with
> >
> >if (fpu_mxcsr_mask == 0) {
> >
> > would be better?
> 
> No.  The primary cpu check is necessary, at least on i386, because
> secondary cpus call npxinit() before the primary cpu *and* before
> setting CR4_OSFXSR.

See my comment above.

> >> Index: sys/arch/i386/i386/process_machdep.c
> >> ===
> >> RCS file: /cvs/src/sys/arch/i386/i386/process_machdep.c,v
> >> retrieving revision 1.25
> >> diff -u -p -r1.25 process_machdep.c
> >> --- sys/arch/i386/i386/process_machdep.c  29 Sep 2010 15:11:31 -   
> >>1.25
> >> +++ sys/arch/i386/i386/process_machdep.c  25 Dec 2010 19:38:52 -
> >> @@ -308,6 +309,7 @@ process_write_fpregs(struct proc *p, str
> >>   /* XXX Yuck. */
> >>   memcpy(&s87, regs, sizeof(*regs));
> >>   process_s87_to_xmm(&s87, &frame->sv_xmm);
> >> + frame->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask;
> >>   } else
> >>   memcpy(&frame->sv_87, regs, sizeof(*regs));
> >
> > Oh, this actually points out an interesting problem.  The MXCSR
> > register isn't initialized from data passed to us by userland.
> > However, if a user calls ptrace(PT_SETFPREGS, ...) on a process that
> > didn't use the FPU yet, it isn't quite clear what we initialize the
> > XMM state to.  While your approach will make sure we won't cause a
> > segfault, we might still be leaking stuff from the kernel to userland.
> > I think the diff below is a better way to address the issue.
> >
> > Index: process_machdep.c
> > ===
> > RCS file: /cvs/src/sys/arch/i386/i386/process_machdep.c,v
> > retrieving revision 1.25
> > diff -u -p -r1.25 process_machdep.c
> > --- process_machdep.c   29 Sep 2010 15:11:31 -  1.25
> > +++ process_machdep.c   26 Dec 2010 15:23:33 -
> > @@ -299,8 +299,15 @@ process_write_fpregs(struct proc *p, str
> >  #if NNPX > 0
> >npxsave_proc(p, 0);
> >  #endif
> > -   } else
> > +   } else {
> > +   /*
> > +* Make sure MXCSR and the XMM registers are
> > +* initialized to sane defaults.
> > +*/
> > +   if (i386_use_fxsave)
> > +   process_fninit_xmm(&frame->sv_xmm);
> >p->p_md.md_flags |= MDP_USEDFPU;
> > +   }
> >
> >if (i386_use_fxsave) {
> >struct save87 s87;
> 
> Yes, I agree.  IMO, that can be committed separately (ok guenther@)
> from this mxcsr_mask diff.

Ok, done.

> What this really points out, however, is that the masking added by my
> diff was in the wrong function: it should have been in
> process_write_xmmregs(), duh.  I'll fix that in the 

Re: correct mxcsr+mxcsr_mask handling (revised)

2010-12-26 Thread Philip Guenther
On Sun, Dec 26, 2010 at 7:24 AM, Mark Kettenis 
wrote:
>> Originally worked out by joshe@; this corrects the timing of the call to
>> fpuinit() for the primary CPU on amd64 so that the saved mask value is
>> initialized correctly.
>
> Hmm, I'm not too happy about moving fpuinit() out of cpu_hatch() on
> amd64, while leaving npxinit() in there for i386.

Well, on i386, npxinit() is called for the primary cpu when the npx is
attached, which is way after the secondary cpus are attached.  Can we
assume it to be safe to call npxinit() before the npx device is
attached?  It wouldn't surprise me to hear that it only works when npx
errors are reported using exceptions and not ISA interrupts...which
would explain why the early calls are fine on multi-CPU systems,
'cause they all use exceptions.

So no, I don't think it's safe to move up the npxinit() call into
cpu_init(), unless you're suggesting that we delay initializing
secondary cpus until after the isa bus is probed (barf).  The
consistent alternative would be to leave the fpuinit() call in
cpu_hatch() and call it for the primary cpu near the end of
init_x86_64(), that being the rough equivalent for the primary cpu.


>> + if (CPU_IS_PRIMARY(ci)) {
>> + struct fxsave64 fx __attribute__((aligned(16)));
>> +
>> + bzero(&fx, sizeof(fx));
>> + fxsave(&fx);
>> + if (fx.fx_mxcsr_mask)
>> + fpu_mxcsr_mask = fx.fx_mxcsr_mask;
>> + else
>> + fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
>> + }
>
> This function will be run again upon resume.  Now overwriting
> fpu_mxcsr_mask shouldn't hurt, but perhaps replacing:
>
>if (CPU_IS_PRIMARY(ci)) {
>
> with
>
>if (fpu_mxcsr_mask == 0) {
>
> would be better?

No.  The primary cpu check is necessary, at least on i386, because
secondary cpus call npxinit() before the primary cpu *and* before
setting CR4_OSFXSR.

(Also, you're not planning on hot-upgrading your FPU by replacing the
CPU while it's asleep?!)


>> Index: sys/arch/i386/i386/process_machdep.c
>> ===
>> RCS file: /cvs/src/sys/arch/i386/i386/process_machdep.c,v
>> retrieving revision 1.25
>> diff -u -p -r1.25 process_machdep.c
>> --- sys/arch/i386/i386/process_machdep.c  29 Sep 2010 15:11:31 -  
   1.25
>> +++ sys/arch/i386/i386/process_machdep.c  25 Dec 2010 19:38:52 -
>> @@ -308,6 +309,7 @@ process_write_fpregs(struct proc *p, str
>>   /* XXX Yuck. */
>>   memcpy(&s87, regs, sizeof(*regs));
>>   process_s87_to_xmm(&s87, &frame->sv_xmm);
>> + frame->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask;
>>   } else
>>   memcpy(&frame->sv_87, regs, sizeof(*regs));
>
> Oh, this actually points out an interesting problem.  The MXCSR
> register isn't initialized from data passed to us by userland.
> However, if a user calls ptrace(PT_SETFPREGS, ...) on a process that
> didn't use the FPU yet, it isn't quite clear what we initialize the
> XMM state to.  While your approach will make sure we won't cause a
> segfault, we might still be leaking stuff from the kernel to userland.
> I think the diff below is a better way to address the issue.
>
> Index: process_machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/process_machdep.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 process_machdep.c
> --- process_machdep.c   29 Sep 2010 15:11:31 -  1.25
> +++ process_machdep.c   26 Dec 2010 15:23:33 -
> @@ -299,8 +299,15 @@ process_write_fpregs(struct proc *p, str
>  #if NNPX > 0
>npxsave_proc(p, 0);
>  #endif
> -   } else
> +   } else {
> +   /*
> +* Make sure MXCSR and the XMM registers are
> +* initialized to sane defaults.
> +*/
> +   if (i386_use_fxsave)
> +   process_fninit_xmm(&frame->sv_xmm);
>p->p_md.md_flags |= MDP_USEDFPU;
> +   }
>
>if (i386_use_fxsave) {
>struct save87 s87;

Yes, I agree.  IMO, that can be committed separately (ok guenther@)
from this mxcsr_mask diff.

What this really points out, however, is that the masking added by my
diff was in the wrong function: it should have been in
process_write_xmmregs(), duh.  I'll fix that in the next rev.

Once we agree where to npxinit()/fpuinit(), I'll post an updated diff.


Philip Guenther



Re: correct mxcsr+mxcsr_mask handling (revised)

2010-12-26 Thread Mark Kettenis
> Originally worked out by joshe@; this corrects the timing of the call to 
> fpuinit() for the primary CPU on amd64 so that the saved mask value is 
> initialized correctly.

Hmm, I'm not too happy about moving fpuinit() out of cpu_hatch() on
amd64, while leaving npxinit() in there for i386.

Also:

> Index: sys/arch/amd64/amd64/cpu.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.40
> diff -u -p -r1.40 cpu.c
> --- sys/arch/amd64/amd64/cpu.c27 Nov 2010 13:03:04 -  1.40
> +++ sys/arch/amd64/amd64/cpu.c25 Dec 2010 19:38:48 -
> @@ -103,6 +105,16 @@ fpuinit(struct cpu_info *ci)
>  {
>   lcr0(rcr0() & ~(CR0_EM|CR0_TS));
>   fninit();
> + if (CPU_IS_PRIMARY(ci)) {
> + struct fxsave64 fx __attribute__((aligned(16)));
> +
> + bzero(&fx, sizeof(fx));
> + fxsave(&fx);
> + if (fx.fx_mxcsr_mask)
> + fpu_mxcsr_mask = fx.fx_mxcsr_mask;
> + else
> + fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
> + }
>   lcr0(rcr0() | (CR0_TS));
>  }

This function will be run again upon resume.  Now overwriting
fpu_mxcsr_mask shouldn't hurt, but perhaps replacing:

if (CPU_IS_PRIMARY(ci)) {

with

if (fpu_mxcsr_mask == 0) {

would be better?

> Index: sys/arch/i386/i386/process_machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/process_machdep.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 process_machdep.c
> --- sys/arch/i386/i386/process_machdep.c  29 Sep 2010 15:11:31 -  
> 1.25
> +++ sys/arch/i386/i386/process_machdep.c  25 Dec 2010 19:38:52 -
> @@ -308,6 +309,7 @@ process_write_fpregs(struct proc *p, str
>   /* XXX Yuck. */
>   memcpy(&s87, regs, sizeof(*regs));
>   process_s87_to_xmm(&s87, &frame->sv_xmm);
> + frame->sv_xmm.sv_env.en_mxcsr &= fpu_mxcsr_mask;
>   } else
>   memcpy(&frame->sv_87, regs, sizeof(*regs));

Oh, this actually points out an interesting problem.  The MXCSR
register isn't initialized from data passed to us by userland.
However, if a user calls ptrace(PT_SETFPREGS, ...) on a process that
didn't use the FPU yet, it isn't quite clear what we initialize the
XMM state to.  While your approach will make sure we won't cause a
segfault, we might still be leaking stuff from the kernel to userland.
I think the diff below is a better way to address the issue.

Index: process_machdep.c
===
RCS file: /cvs/src/sys/arch/i386/i386/process_machdep.c,v
retrieving revision 1.25
diff -u -p -r1.25 process_machdep.c
--- process_machdep.c   29 Sep 2010 15:11:31 -  1.25
+++ process_machdep.c   26 Dec 2010 15:23:33 -
@@ -299,8 +299,15 @@ process_write_fpregs(struct proc *p, str
 #if NNPX > 0
npxsave_proc(p, 0);
 #endif
-   } else
+   } else {
+   /*
+* Make sure MXCSR and the XMM registers are
+* initialized to sane defaults.
+*/
+   if (i386_use_fxsave)
+   process_fninit_xmm(&frame->sv_xmm);
p->p_md.md_flags |= MDP_USEDFPU;
+   }
 
if (i386_use_fxsave) {
struct save87 s87;



Re: correct mxcsr+mxcsr_mask handling (revised)

2010-12-26 Thread Kenneth R Westerback
On Sat, Dec 25, 2010 at 12:02:58PM -0800, Philip Guenther wrote:
> Here's a revised diff to correct the handling on amd64 and i386 of the 
> MXCSR register in frames that could be altered by users, so that a user 
> can't trick the kernel into faulting by trying to load an invalid MXCSR 
> value during the return to userspace.
> 
> Originally worked out by joshe@; this corrects the timing of the call to 
> fpuinit() for the primary CPU on amd64 so that the saved mask value is 
> initialized correctly.  It also uniformly uses 
> __attribute__((aligned(16))) do get a buffer of the correct alignment.
> 
> I've been using this on amd64 for a while and have confirmed that a test 
> program which previous crashed the box now continues just fine.  I'm away 
> from my i386 test box so we could use confirmation from someone running an 
> i386 that shows FXSR in the cpu0 flags list.
> 
> ok?
> 
> 
> Philip Guenther
> 
> Index: sys/arch/amd64/amd64/cpu.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.40
> diff -u -p -r1.40 cpu.c
> --- sys/arch/amd64/amd64/cpu.c27 Nov 2010 13:03:04 -  1.40
> +++ sys/arch/amd64/amd64/cpu.c25 Dec 2010 19:38:48 -
> @@ -365,6 +365,8 @@ cpu_attach(struct device *parent, struct
>  void
>  cpu_init(struct cpu_info *ci)
>  {
> + fpuinit(ci);
> +
>   /* configure the CPU if needed */
>   if (ci->cpu_setup != NULL)
>   (*ci->cpu_setup)(ci);
> @@ -509,7 +511,6 @@ cpu_hatch(void *v)
>   cpu_init_idt();
>   lapic_set_lvt();
>   gdt_init_cpu(ci);
> - fpuinit(ci);
>  
>   lldt(0);
>  
> Index: sys/arch/amd64/amd64/fpu.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 fpu.c
> --- sys/arch/amd64/amd64/fpu.c29 Sep 2010 15:11:31 -  1.21
> +++ sys/arch/amd64/amd64/fpu.c25 Dec 2010 19:38:48 -
> @@ -95,6 +95,8 @@
>  void fpudna(struct cpu_info *);
>  static int x86fpflags_to_siginfo(u_int32_t);
>  
> +uint32_t fpu_mxcsr_mask;
> +
>  /*
>   * Init the FPU.
>   */
> @@ -103,6 +105,16 @@ fpuinit(struct cpu_info *ci)
>  {
>   lcr0(rcr0() & ~(CR0_EM|CR0_TS));
>   fninit();
> + if (CPU_IS_PRIMARY(ci)) {
> + struct fxsave64 fx __attribute__((aligned(16)));
> +
> + bzero(&fx, sizeof(fx));
> + fxsave(&fx);
> + if (fx.fx_mxcsr_mask)
> + fpu_mxcsr_mask = fx.fx_mxcsr_mask;
> + else
> + fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
> + }
>   lcr0(rcr0() | (CR0_TS));
>  }
>  
> Index: sys/arch/amd64/amd64/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 machdep.c
> --- sys/arch/amd64/amd64/machdep.c22 Nov 2010 21:07:16 -  1.129
> +++ sys/arch/amd64/amd64/machdep.c25 Dec 2010 19:38:49 -
> @@ -658,9 +658,11 @@ sys_sigreturn(struct proc *p, void *v, r
>   fpusave_proc(p, 0);
>  
>   if (ksc.sc_fpstate) {
> - if ((error = copyin(ksc.sc_fpstate,
> - &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave, sizeof (struct 
> fxsave64
> + struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;
> +
> + if ((error = copyin(ksc.sc_fpstate, fx, sizeof(*fx
>   return (error);
> + fx->fx_mxcsr &= fpu_mxcsr_mask;
>   p->p_md.md_flags |= MDP_USEDFPU;
>   }
>  
> Index: sys/arch/amd64/amd64/process_machdep.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 process_machdep.c
> --- sys/arch/amd64/amd64/process_machdep.c29 Sep 2010 15:11:31 -  
> 1.9
> +++ sys/arch/amd64/amd64/process_machdep.c25 Dec 2010 19:38:50 -
> @@ -137,7 +137,7 @@ process_read_fpregs(struct proc *p, stru
>   frame->fx_fsw = 0x;
>   frame->fx_ftw = 0xff;
>   frame->fx_mxcsr = __INITIAL_MXCSR__;
> - frame->fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
> + frame->fx_mxcsr_mask = fpu_mxcsr_mask;
>   p->p_md.md_flags |= MDP_USEDFPU;
>   }
>  
> @@ -198,6 +198,7 @@ process_write_fpregs(struct proc *p, str
>   }
>  
>   memcpy(frame, ®s->fxstate, sizeof(*regs));
> + frame->fx_mxcsr &= fpu_mxcsr_mask;
>   return (0);
>  }
>  
> Index: sys/arch/amd64/include/fpu.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
> retrieving revision 1.7
> diff -u -p -r1.7 fpu.h
> --- sys/arch/amd64/include/fpu.h  20 Nov 2010 20:11:17 -  1.7
> +++ sys/arch/amd64/include/fpu.h  25 

correct mxcsr+mxcsr_mask handling (revised)

2010-12-25 Thread Philip Guenther
Here's a revised diff to correct the handling on amd64 and i386 of the 
MXCSR register in frames that could be altered by users, so that a user 
can't trick the kernel into faulting by trying to load an invalid MXCSR 
value during the return to userspace.

Originally worked out by joshe@; this corrects the timing of the call to 
fpuinit() for the primary CPU on amd64 so that the saved mask value is 
initialized correctly.  It also uniformly uses 
__attribute__((aligned(16))) do get a buffer of the correct alignment.

I've been using this on amd64 for a while and have confirmed that a test 
program which previous crashed the box now continues just fine.  I'm away 
from my i386 test box so we could use confirmation from someone running an 
i386 that shows FXSR in the cpu0 flags list.

ok?


Philip Guenther

Index: sys/arch/amd64/amd64/cpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.40
diff -u -p -r1.40 cpu.c
--- sys/arch/amd64/amd64/cpu.c  27 Nov 2010 13:03:04 -  1.40
+++ sys/arch/amd64/amd64/cpu.c  25 Dec 2010 19:38:48 -
@@ -365,6 +365,8 @@ cpu_attach(struct device *parent, struct
 void
 cpu_init(struct cpu_info *ci)
 {
+   fpuinit(ci);
+
/* configure the CPU if needed */
if (ci->cpu_setup != NULL)
(*ci->cpu_setup)(ci);
@@ -509,7 +511,6 @@ cpu_hatch(void *v)
cpu_init_idt();
lapic_set_lvt();
gdt_init_cpu(ci);
-   fpuinit(ci);
 
lldt(0);
 
Index: sys/arch/amd64/amd64/fpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/fpu.c,v
retrieving revision 1.21
diff -u -p -r1.21 fpu.c
--- sys/arch/amd64/amd64/fpu.c  29 Sep 2010 15:11:31 -  1.21
+++ sys/arch/amd64/amd64/fpu.c  25 Dec 2010 19:38:48 -
@@ -95,6 +95,8 @@
 void fpudna(struct cpu_info *);
 static int x86fpflags_to_siginfo(u_int32_t);
 
+uint32_t   fpu_mxcsr_mask;
+
 /*
  * Init the FPU.
  */
@@ -103,6 +105,16 @@ fpuinit(struct cpu_info *ci)
 {
lcr0(rcr0() & ~(CR0_EM|CR0_TS));
fninit();
+   if (CPU_IS_PRIMARY(ci)) {
+   struct fxsave64 fx __attribute__((aligned(16)));
+
+   bzero(&fx, sizeof(fx));
+   fxsave(&fx);
+   if (fx.fx_mxcsr_mask)
+   fpu_mxcsr_mask = fx.fx_mxcsr_mask;
+   else
+   fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+   }
lcr0(rcr0() | (CR0_TS));
 }
 
Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.129
diff -u -p -r1.129 machdep.c
--- sys/arch/amd64/amd64/machdep.c  22 Nov 2010 21:07:16 -  1.129
+++ sys/arch/amd64/amd64/machdep.c  25 Dec 2010 19:38:49 -
@@ -658,9 +658,11 @@ sys_sigreturn(struct proc *p, void *v, r
fpusave_proc(p, 0);
 
if (ksc.sc_fpstate) {
-   if ((error = copyin(ksc.sc_fpstate,
-   &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave, sizeof (struct 
fxsave64
+   struct fxsave64 *fx = &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave;
+
+   if ((error = copyin(ksc.sc_fpstate, fx, sizeof(*fx
return (error);
+   fx->fx_mxcsr &= fpu_mxcsr_mask;
p->p_md.md_flags |= MDP_USEDFPU;
}
 
Index: sys/arch/amd64/amd64/process_machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/process_machdep.c,v
retrieving revision 1.9
diff -u -p -r1.9 process_machdep.c
--- sys/arch/amd64/amd64/process_machdep.c  29 Sep 2010 15:11:31 -  
1.9
+++ sys/arch/amd64/amd64/process_machdep.c  25 Dec 2010 19:38:50 -
@@ -137,7 +137,7 @@ process_read_fpregs(struct proc *p, stru
frame->fx_fsw = 0x;
frame->fx_ftw = 0xff;
frame->fx_mxcsr = __INITIAL_MXCSR__;
-   frame->fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+   frame->fx_mxcsr_mask = fpu_mxcsr_mask;
p->p_md.md_flags |= MDP_USEDFPU;
}
 
@@ -198,6 +198,7 @@ process_write_fpregs(struct proc *p, str
}
 
memcpy(frame, ®s->fxstate, sizeof(*regs));
+   frame->fx_mxcsr &= fpu_mxcsr_mask;
return (0);
 }
 
Index: sys/arch/amd64/include/fpu.h
===
RCS file: /cvs/src/sys/arch/amd64/include/fpu.h,v
retrieving revision 1.7
diff -u -p -r1.7 fpu.h
--- sys/arch/amd64/include/fpu.h20 Nov 2010 20:11:17 -  1.7
+++ sys/arch/amd64/include/fpu.h25 Dec 2010 19:38:50 -
@@ -49,6 +49,8 @@ struct savefpu {
 struct trapframe;
 struct cpu_info;
 
+extern uint32_tfpu_mxcsr_mask;
+
 void fpuinit(struct cpu_info *);
 void fpudrop(void);
 void fpudiscard(struct proc *);
Index: sys/arch/i386/i386