Module Name:    src
Committed By:   dsl
Date:           Fri Feb  7 22:40:22 UTC 2014

Modified Files:
        src/sys/arch/amd64/amd64: fpu.c machdep.c netbsd32_machdep.c
            process_machdep.c
        src/sys/arch/amd64/conf: files.amd64
        src/sys/arch/amd64/include: fpu.h netbsd32_machdep.h pcb.h reg.h
        src/sys/arch/i386/conf: files.i386
        src/sys/arch/i386/i386: process_machdep.c
        src/sys/arch/i386/include: npx.h
        src/sys/arch/x86/include: cpu_extended_state.h
        src/sys/arch/xen/conf: files.xen
        src/sys/compat/linux/arch/amd64: linux_machdep.c linux_machdep.h
        src/sys/compat/linux32/arch/amd64: linux32_machdep.c
Added Files:
        src/sys/arch/x86/x86: convert_xmm_s87.c

Log Message:
Convert the amd64 build to use x86/cpu_extended_state.h so that the fpu
  definitions match those of i386.
Mostly just structure and field renames, in addition:
1) process_xmm_to_s87() and process_s87_to_xmm() moved into
   x86/convert_xmm_s87.c so they can be used by amd64's netbsd32 code.
2) The linux signal code simplified to use a structure copy for ths fxsave
   data - it matches the hardware definition and won't change.


To generate a diff of this commit:
cvs rdiff -u -r1.45 -r1.46 src/sys/arch/amd64/amd64/fpu.c
cvs rdiff -u -r1.201 -r1.202 src/sys/arch/amd64/amd64/machdep.c
cvs rdiff -u -r1.88 -r1.89 src/sys/arch/amd64/amd64/netbsd32_machdep.c
cvs rdiff -u -r1.25 -r1.26 src/sys/arch/amd64/amd64/process_machdep.c
cvs rdiff -u -r1.80 -r1.81 src/sys/arch/amd64/conf/files.amd64
cvs rdiff -u -r1.11 -r1.12 src/sys/arch/amd64/include/fpu.h
cvs rdiff -u -r1.18 -r1.19 src/sys/arch/amd64/include/netbsd32_machdep.h
cvs rdiff -u -r1.22 -r1.23 src/sys/arch/amd64/include/pcb.h
cvs rdiff -u -r1.7 -r1.8 src/sys/arch/amd64/include/reg.h
cvs rdiff -u -r1.372 -r1.373 src/sys/arch/i386/conf/files.i386
cvs rdiff -u -r1.81 -r1.82 src/sys/arch/i386/i386/process_machdep.c
cvs rdiff -u -r1.34 -r1.35 src/sys/arch/i386/include/npx.h
cvs rdiff -u -r1.1 -r1.2 src/sys/arch/x86/include/cpu_extended_state.h
cvs rdiff -u -r0 -r1.1 src/sys/arch/x86/x86/convert_xmm_s87.c
cvs rdiff -u -r1.130 -r1.131 src/sys/arch/xen/conf/files.xen
cvs rdiff -u -r1.44 -r1.45 src/sys/compat/linux/arch/amd64/linux_machdep.c
cvs rdiff -u -r1.13 -r1.14 src/sys/compat/linux/arch/amd64/linux_machdep.h
cvs rdiff -u -r1.33 -r1.34 \
    src/sys/compat/linux32/arch/amd64/linux32_machdep.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/fpu.c
diff -u src/sys/arch/amd64/amd64/fpu.c:1.45 src/sys/arch/amd64/amd64/fpu.c:1.46
--- src/sys/arch/amd64/amd64/fpu.c:1.45	Tue Feb  4 21:09:23 2014
+++ src/sys/arch/amd64/amd64/fpu.c	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: fpu.c,v 1.45 2014/02/04 21:09:23 dsl Exp $	*/
+/*	$NetBSD: fpu.c,v 1.46 2014/02/07 22:40:22 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
@@ -100,7 +100,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.45 2014/02/04 21:09:23 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.46 2014/02/07 22:40:22 dsl Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -179,7 +179,7 @@ fputrap(struct trapframe *frame)
 {
 	struct lwp *l = curlwp;
 	struct pcb *pcb = lwp_getpcb(l);
-	struct savefpu *sfp = &pcb->pcb_savefpu;
+	union savefpu *sfp = &pcb->pcb_savefpu;
 	uint32_t mxcsr, statbits;
 	ksiginfo_t ksi;
 
@@ -194,7 +194,7 @@ fputrap(struct trapframe *frame)
 	fxsave(sfp);
 
 	if (frame->tf_trapno == T_XMM) {
-		mxcsr = sfp->fp_fxsave.fx_mxcsr;
+		mxcsr = sfp->sv_xmm.fx_mxcsr;
 		statbits = mxcsr;
 		mxcsr &= ~0x3f;
 		x86_ldmxcsr(&mxcsr);
@@ -203,10 +203,10 @@ fputrap(struct trapframe *frame)
 
 		fninit();
 		fwait();
-		cw = sfp->fp_fxsave.fx_fcw;
+		cw = sfp->sv_xmm.fx_cw;
 		fldcw(&cw);
 		fwait();
-		statbits = sfp->fp_fxsave.fx_fsw;
+		statbits = sfp->sv_xmm.fx_sw;
 	}
 	KPREEMPT_ENABLE(l);
 
@@ -300,9 +300,9 @@ fpudna(struct cpu_info *ci)
 	pcb->pcb_fpcpu = ci;
 	if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
 		fninit();
-		cw = pcb->pcb_savefpu.fp_fxsave.fx_fcw;
+		cw = pcb->pcb_savefpu.sv_xmm.fx_cw;
 		fldcw(&cw);
-		mxcsr = pcb->pcb_savefpu.fp_fxsave.fx_mxcsr;
+		mxcsr = pcb->pcb_savefpu.sv_xmm.fx_mxcsr;
 		x86_ldmxcsr(&mxcsr);
 		l->l_md.md_flags |= MDL_USEDFPU;
 	} else {

Index: src/sys/arch/amd64/amd64/machdep.c
diff -u src/sys/arch/amd64/amd64/machdep.c:1.201 src/sys/arch/amd64/amd64/machdep.c:1.202
--- src/sys/arch/amd64/amd64/machdep.c:1.201	Thu Jan  9 00:57:25 2014
+++ src/sys/arch/amd64/amd64/machdep.c	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.201 2014/01/09 00:57:25 dholland Exp $	*/
+/*	$NetBSD: machdep.c,v 1.202 2014/02/07 22:40:22 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -111,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.201 2014/01/09 00:57:25 dholland Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.202 2014/02/07 22:40:22 dsl Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -1341,11 +1341,11 @@ setregs(struct lwp *l, struct exec_packa
 	l->l_md.md_flags &= ~MDL_USEDFPU;
 	pcb->pcb_flags = 0;
 	if (pack->ep_osversion >= 699002600)
-		pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
+		pcb->pcb_savefpu.sv_xmm.fx_cw = __NetBSD_NPXCW__;
 	else
-		pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_COMPAT_NPXCW__;
-	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
-	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+		pcb->pcb_savefpu.sv_xmm.fx_cw = __NetBSD_COMPAT_NPXCW__;
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
 
 	l->l_proc->p_flag &= ~PK_32;
 
@@ -1939,7 +1939,7 @@ cpu_getmcontext(struct lwp *l, mcontext_
 		if (pcb->pcb_fpcpu) {
 			fpusave_lwp(l, true);
 		}
-		memcpy(mcp->__fpregs, &pcb->pcb_savefpu.fp_fxsave,
+		memcpy(mcp->__fpregs, &pcb->pcb_savefpu.sv_xmm,
 		    sizeof (mcp->__fpregs));
 		*flags |= _UC_FPU;
 	}
@@ -1995,7 +1995,7 @@ cpu_setmcontext(struct lwp *l, const mco
 		fpusave_lwp(l, false);
 
 	if ((flags & _UC_FPU) != 0) {
-		memcpy(&pcb->pcb_savefpu.fp_fxsave, mcp->__fpregs,
+		memcpy(&pcb->pcb_savefpu.sv_xmm, mcp->__fpregs,
 		    sizeof (mcp->__fpregs));
 		l->l_md.md_flags |= MDL_USEDFPU;
 	}

Index: src/sys/arch/amd64/amd64/netbsd32_machdep.c
diff -u src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.88 src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.89
--- src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.88	Sat Jan 25 05:09:59 2014
+++ src/sys/arch/amd64/amd64/netbsd32_machdep.c	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_machdep.c,v 1.88 2014/01/25 05:09:59 christos Exp $	*/
+/*	$NetBSD: netbsd32_machdep.c,v 1.89 2014/02/07 22:40:22 dsl Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.88 2014/01/25 05:09:59 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.89 2014/02/07 22:40:22 dsl Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -143,11 +143,11 @@ netbsd32_setregs(struct lwp *l, struct e
 	l->l_md.md_flags |= MDL_COMPAT32;	/* Force iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
 	if (pack->ep_osversion >= 699002600)
-		pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
+		pcb->pcb_savefpu.sv_xmm.fx_cw = __NetBSD_NPXCW__;
 	else
-		pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_COMPAT_NPXCW__;
-        pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;  
-	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+		pcb->pcb_savefpu.sv_xmm.fx_cw = __NetBSD_COMPAT_NPXCW__;
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;  
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
 
 	p->p_flag |= PK_32;
 
@@ -530,68 +530,12 @@ netbsd32_process_read_regs(struct lwp *l
 	return (0);
 }
 
-/*
- * XXX-cube (20060311):  This doesn't seem to work fine.
- */
-static int
-xmm_to_s87_tag(const uint8_t *fpac, int regno, uint8_t tw)
-{
-	static const uint8_t empty_significand[8] = { 0 };
-	int tag;
-	uint16_t exponent;
-
-	if (tw & (1U << regno)) {
-		exponent = fpac[8] | (fpac[9] << 8);
-		switch (exponent) {
-		case 0x7fff:
-			tag = 2;
-			break;
-
-		case 0x0000:
-			if (memcmp(empty_significand, fpac,
-				   sizeof(empty_significand)) == 0)
-				tag = 1;
-			else
-				tag = 2;
-			break;
-
-		default:
-			if ((fpac[7] & 0x80) == 0)
-				tag = 2;
-			else
-				tag = 0;
-			break;
-		}
-	} else
-		tag = 3;
-
-	return (tag);
-}
-
 int
 netbsd32_process_read_fpregs(struct lwp *l, struct fpreg32 *regs, size_t *sz)
 {
 	struct fpreg regs64;
-	struct save87 *s87 = (struct save87 *)regs;
+	int error;
 	size_t fp_size;
-	int error, i;
-
-	union fp_addr {
-	        uint64_t fa_64; /* Linear address for 64bit systems */
-	        struct {
-	                uint32_t fa_off;        /* Linear address for 32 bit */
-	                uint16_t fa_seg;        /* Code/data (etc) segment */
-	                uint16_t fa_pad;
-	        } fa_32; 
-	} fa;
-
-	/* 
-	 * NOTE: This 'struct fpreg32' is just char[108] and is shorter
-	 * than 'struct save87'.
-	 * If we write to the extra fields we trash the stack when writing
-	 * process coredumps (see coredump_note() in core_elf32.c).
-	 * This code must not set sv_env.en_tw or s87->sv_ex_sw.
-	 */
 
 	/*
 	 * All that stuff makes no sense in i386 code :(
@@ -601,26 +545,8 @@ netbsd32_process_read_fpregs(struct lwp 
 	error = process_read_fpregs(l, &regs64, &fp_size);
 	if (error)
 		return error;
-
-	s87->sv_env.en_cw = regs64.fxstate.fx_fcw;
-	s87->sv_env.en_sw = regs64.fxstate.fx_fsw;
-	fa.fa_64 = regs64.fxstate.fx_rip;
-	s87->sv_env.en_fip = fa.fa_32.fa_off;
-	s87->sv_env.en_fcs = fa.fa_32.fa_seg;
-	s87->sv_env.en_opcode = regs64.fxstate.fx_fop;
-	fa.fa_64 = regs64.fxstate.fx_rdp;
-	s87->sv_env.en_foo = fa.fa_32.fa_off;
-	s87->sv_env.en_fos = fa.fa_32.fa_seg;
-
-	s87->sv_env.en_tw = 0;
-	for (i = 0; i < 8; i++) {
-		s87->sv_env.en_tw |=
-		    (xmm_to_s87_tag((uint8_t *)&regs64.fxstate.fx_st[i][0], i,
-		     regs64.fxstate.fx_ftw) << (i * 2));
-
-		memcpy(&s87->sv_ac[i].fp_bytes, &regs64.fxstate.fx_st[i][0],
-		    sizeof(s87->sv_ac[i].fp_bytes));
-	}
+	__CTASSERT(sizeof *regs == sizeof (struct save87));
+	process_xmm_to_s87(&regs64.fxstate, (struct save87 *)regs);
 
 	return (0);
 }
@@ -897,8 +823,8 @@ cpu_setmcontext32(struct lwp *l, const m
 		if (pcb->pcb_fpcpu != NULL) {
 			fpusave_lwp(l, false);
 		}
-		memcpy(&pcb->pcb_savefpu.fp_fxsave, &mcp->__fpregs,
-		    sizeof (pcb->pcb_savefpu.fp_fxsave));
+		memcpy(&pcb->pcb_savefpu.sv_xmm, &mcp->__fpregs,
+		    sizeof (pcb->pcb_savefpu.sv_xmm));
 		/* If not set already. */
 		l->l_md.md_flags |= MDL_USEDFPU;
 	}
@@ -957,8 +883,8 @@ cpu_getmcontext32(struct lwp *l, mcontex
 		if (pcb->pcb_fpcpu) {
 			fpusave_lwp(l, true);
 		}
-		memcpy(&mcp->__fpregs, &pcb->pcb_savefpu.fp_fxsave,
-		    sizeof (pcb->pcb_savefpu.fp_fxsave));
+		memcpy(&mcp->__fpregs, &pcb->pcb_savefpu.sv_xmm,
+		    sizeof (pcb->pcb_savefpu.sv_xmm));
 		*flags |= _UC_FPU;
 	}
 }

Index: src/sys/arch/amd64/amd64/process_machdep.c
diff -u src/sys/arch/amd64/amd64/process_machdep.c:1.25 src/sys/arch/amd64/amd64/process_machdep.c:1.26
--- src/sys/arch/amd64/amd64/process_machdep.c:1.25	Sat Jan  4 00:10:02 2014
+++ src/sys/arch/amd64/amd64/process_machdep.c	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: process_machdep.c,v 1.25 2014/01/04 00:10:02 dsl Exp $	*/
+/*	$NetBSD: process_machdep.c,v 1.26 2014/02/07 22:40:22 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -53,7 +53,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.25 2014/01/04 00:10:02 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.26 2014/02/07 22:40:22 dsl Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -69,7 +69,7 @@ __KERNEL_RCSID(0, "$NetBSD: process_mach
 #include <machine/fpu.h>
 
 static inline struct trapframe *process_frame(struct lwp *);
-static inline struct fxsave64 *process_fpframe(struct lwp *);
+static inline struct fxsave *process_fpframe(struct lwp *);
 #if 0
 static inline int verr_gdt(struct pmap *, int sel);
 static inline int verr_ldt(struct pmap *, int sel);
@@ -82,12 +82,12 @@ process_frame(struct lwp *l)
 	return (l->l_md.md_regs);
 }
 
-static inline struct fxsave64 *
+static inline struct fxsave *
 process_fpframe(struct lwp *l)
 {
 	struct pcb *pcb = lwp_getpcb(l);
 
-	return &pcb->pcb_savefpu.fp_fxsave;
+	return &pcb->pcb_savefpu.sv_xmm;
 }
 
 int
@@ -105,7 +105,7 @@ process_read_regs(struct lwp *l, struct 
 int
 process_read_fpregs(struct lwp *l, struct fpreg *regs,size_t *sz)
 {
-	struct fxsave64 *frame = process_fpframe(l);
+	struct fxsave *frame = process_fpframe(l);
 
 	if (l->l_md.md_flags & MDL_USEDFPU) {
 		fpusave_lwp(l, true);
@@ -118,13 +118,13 @@ process_read_fpregs(struct lwp *l, struc
 		 * The initial control word was already set by setregs(), so
 		 * save it temporarily.
 		 */
-		cw = frame->fx_fcw;
+		cw = frame->fx_cw;
 		mxcsr = frame->fx_mxcsr;
 		mxcsr_mask = frame->fx_mxcsr_mask;
 		memset(frame, 0, sizeof(*regs));
-		frame->fx_fcw = cw;
-		frame->fx_fsw = 0x0000;
-		frame->fx_ftw = 0x00;	/* abridged tag; all empty */
+		frame->fx_cw = cw;
+		frame->fx_sw = 0x0000;
+		frame->fx_tw = 0x00;	/* abridged tag; all empty */
 		frame->fx_mxcsr = mxcsr;
 		frame->fx_mxcsr_mask = mxcsr_mask;
 		l->l_md.md_flags |= MDL_USEDFPU;
@@ -160,7 +160,7 @@ process_write_regs(struct lwp *l, const 
 int
 process_write_fpregs(struct lwp *l, const struct fpreg *regs, size_t sz)
 {
-	struct fxsave64 *frame = process_fpframe(l);
+	struct fxsave *frame = process_fpframe(l);
 
 	if (l->l_md.md_flags & MDL_USEDFPU) {
 		fpusave_lwp(l, false);

Index: src/sys/arch/amd64/conf/files.amd64
diff -u src/sys/arch/amd64/conf/files.amd64:1.80 src/sys/arch/amd64/conf/files.amd64:1.81
--- src/sys/arch/amd64/conf/files.amd64:1.80	Wed Jul 17 21:26:28 2013
+++ src/sys/arch/amd64/conf/files.amd64	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-#	$NetBSD: files.amd64,v 1.80 2013/07/17 21:26:28 soren Exp $
+#	$NetBSD: files.amd64,v 1.81 2014/02/07 22:40:22 dsl Exp $
 #
 # new style config file for amd64 architecture
 #
@@ -133,6 +133,7 @@ attach	fd at fdc
 # NetBSD/i386 32-bit binary compatibility (COMPAT_NETBSD32)
 include "compat/netbsd32/files.netbsd32"
 file	arch/amd64/amd64/netbsd32_machdep.c	compat_netbsd32
+file	arch/x86/x86/convert_xmm_s87.c		compat_netbsd32
 file	arch/amd64/amd64/netbsd32_sigcode.S	compat_netbsd32 & compat_16
 file	arch/amd64/amd64/netbsd32_syscall.c	compat_netbsd32
 

Index: src/sys/arch/amd64/include/fpu.h
diff -u src/sys/arch/amd64/include/fpu.h:1.11 src/sys/arch/amd64/include/fpu.h:1.12
--- src/sys/arch/amd64/include/fpu.h:1.11	Wed Dec 11 22:06:51 2013
+++ src/sys/arch/amd64/include/fpu.h	Fri Feb  7 22:40:22 2014
@@ -1,70 +1,9 @@
-/*	$NetBSD: fpu.h,v 1.11 2013/12/11 22:06:51 dsl Exp $	*/
+/*	$NetBSD: fpu.h,v 1.12 2014/02/07 22:40:22 dsl Exp $	*/
 
 #ifndef	_AMD64_FPU_H_
 #define	_AMD64_FPU_H_
 
-/*
- * NetBSD/amd64 only uses the extended save/restore format used
- * by fxsave/fsrestore, to always deal with the SSE registers,
- * which are part of the ABI to pass floating point values.
- *
- * The memory used for the 'fsave' instruction must be 16 byte aligned,
- * but the definition here isn't aligned to avoid padding elsewhere.
- */
-
-struct fxsave64 {
-	uint16_t  fx_fcw;           /* 0: FPU control word */
-	uint16_t  fx_fsw;           /* 2: FPU status word */
-	uint8_t   fx_ftw;           /* 4: Abridged FPU tag word */
-	uint8_t   fx_reserved1;     /* 5: */
-	uint16_t  fx_fop;           /* 6: Low 11 bits are FPU opcode */
-	uint64_t  fx_rip;           /* 8: Address of faulting instruction */
-	uint64_t  fx_rdp;           /* 16: Data address associated with fault */
-	uint32_t  fx_mxcsr;         /* 24: SIMD control & status */
-	uint32_t  fx_mxcsr_mask;    /* 28: */
-	uint64_t  fx_st[8][2];      /* 32: 8 normal FP regs (80 bit) */
-	uint64_t  fx_xmm[16][2];    /* 160: 16 SSE2 registers */
-	uint8_t   fx_reserved2[48]; /* 416: */
-	uint8_t   fx_available[48]; /* 464: could be used by kernel */
-};
-
-__CTASSERT(sizeof (struct fxsave64) == 512);
-
-struct savefpu {
-	struct fxsave64 fp_fxsave;	/* see above */
-};
-
-/*
- * The i387 defaults to Intel extended precision mode and round to nearest,
- * with all exceptions masked.
- */
-#define	__INITIAL_NPXCW__	0x037f
-#define __INITIAL_MXCSR__ 	0x1f80
-#define __INITIAL_MXCSR_MASK__	0xffbf
-
-/* Modern NetBSD uses the default control word.. */
-#define	__NetBSD_NPXCW__	0x037f
-/* NetBSD before 6.99.26 forced IEEE double precision. */
-#define	__NetBSD_COMPAT_NPXCW__	0x127f
-/* Linux just uses the default control word. */
-#define	__Linux_NPXCW__		0x037f
-
-/*
- * The standard control word from finit is 0x37F, giving:
- *	round to nearest
- *	64-bit precision
- *	all exceptions masked.
- *
- * Now we want:
- *	affine mode (if we decide to support 287's)
- *	round to nearest
- *	53-bit precision
- *	all exceptions masked.
- *
- * 64-bit precision often gives bad results with high level languages
- * because it makes the results of calculations depend on whether
- * intermediate values are stored in memory or in FPU registers.
- */
+#include <x86/cpu_extended_state.h>
 
 #ifdef _KERNEL
 /*

Index: src/sys/arch/amd64/include/netbsd32_machdep.h
diff -u src/sys/arch/amd64/include/netbsd32_machdep.h:1.18 src/sys/arch/amd64/include/netbsd32_machdep.h:1.19
--- src/sys/arch/amd64/include/netbsd32_machdep.h:1.18	Sat Jan  4 00:10:02 2014
+++ src/sys/arch/amd64/include/netbsd32_machdep.h	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_machdep.h,v 1.18 2014/01/04 00:10:02 dsl Exp $	*/
+/*	$NetBSD: netbsd32_machdep.h,v 1.19 2014/02/07 22:40:22 dsl Exp $	*/
 
 #ifndef _MACHINE_NETBSD32_H_
 #define _MACHINE_NETBSD32_H_
@@ -133,33 +133,6 @@ struct x86_64_set_mtrr_args32 {
 	uint32_t n;
 };
 
-struct env87 {
-	int32_t		en_cw;
-	int32_t		en_sw;
-	int32_t		en_tw;
-	int32_t		en_fip;
-	uint16_t	en_fcs;
-	uint16_t	en_opcode;
-	int32_t		en_foo;
-	int32_t		en_fos;
-} __packed;
-
-struct fpacc87 {
-	uint8_t 	fp_bytes[10];
-} __packed;
-
-struct save87 {
-	struct env87	sv_env;
-	struct fpacc87	sv_ac[8];
-	/*
-	 * The fields below are not in the 'struct fpreg32' that is
-	 * otherwise the same as this structure (for coredumps).
-	 */
-	int32_t		sv_ex_sw;
-	int32_t		sv_ex_tw;
-	uint8_t		sv_pad[8 * 2 - 2 * 4];
-} __packed;
-
 #define NETBSD32_MID_MACHINE MID_I386
 
 int netbsd32_process_read_regs(struct lwp *, struct reg32 *);

Index: src/sys/arch/amd64/include/pcb.h
diff -u src/sys/arch/amd64/include/pcb.h:1.22 src/sys/arch/amd64/include/pcb.h:1.23
--- src/sys/arch/amd64/include/pcb.h:1.22	Sun Jan 19 10:30:19 2014
+++ src/sys/arch/amd64/include/pcb.h	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: pcb.h,v 1.22 2014/01/19 10:30:19 dsl Exp $	*/
+/*	$NetBSD: pcb.h,v 1.23 2014/02/07 22:40:22 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -93,7 +93,7 @@ struct pcb {
 	uint64_t pcb_rbp;
 	uint64_t pcb_usersp;
 	uint32_t pcb_unused[2];		/* unused */
-	struct	savefpu pcb_savefpu __aligned(16); /* floating point state */
+	union	savefpu pcb_savefpu __aligned(16); /* floating point state */
 	uint32_t pcb_unused_1[4];	/* unused */
 	void     *pcb_onfault;		/* copyin/out fault recovery */
 	struct cpu_info *pcb_fpcpu;	/* cpu holding our fp state. */

Index: src/sys/arch/amd64/include/reg.h
diff -u src/sys/arch/amd64/include/reg.h:1.7 src/sys/arch/amd64/include/reg.h:1.8
--- src/sys/arch/amd64/include/reg.h:1.7	Sun Oct 26 00:08:15 2008
+++ src/sys/arch/amd64/include/reg.h	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: reg.h,v 1.7 2008/10/26 00:08:15 mrg Exp $	*/
+/*	$NetBSD: reg.h,v 1.8 2014/02/07 22:40:22 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
@@ -43,12 +43,6 @@
 #include <machine/mcontext.h>
 
 /*
- * XXX
- * The #defines aren't used in the kernel, but some user-level code still
- * expects them.
- */
-
-/*
  * Registers accessible to ptrace(2) syscall for debugger use.
  * Same as mcontext.__gregs (except that is 'unsigned long').
  * NB this structure is no longer the same as 'struct trapframe',
@@ -59,20 +53,9 @@ struct reg {
 };
 
 struct fpreg {
-	struct fxsave64 fxstate;
+	struct fxsave fxstate;
 };
 
-#define fp_fcw		fxstate.fx_fcw
-#define fp_fsw		fxstate.fx_fsw
-#define fp_ftw		fxstate.fx_ftw
-#define fp_fop		fxstate.fx_fop
-#define fp_rip		fxstate.fx_rip
-#define fp_rdp		fxstate.fx_rdp
-#define fp_mxcsr	fxstate.fx_mxcsr
-#define fp_mxcsr_mask	fxstate.fx_mxcsr_mask
-#define fp_st		fxstate.fx_st
-#define fp_xmm		fxstate.fx_xmm
-
 #else	/*	__x86_64__	*/
 
 #include <i386/reg.h>

Index: src/sys/arch/i386/conf/files.i386
diff -u src/sys/arch/i386/conf/files.i386:1.372 src/sys/arch/i386/conf/files.i386:1.373
--- src/sys/arch/i386/conf/files.i386:1.372	Sun Jan 26 19:16:16 2014
+++ src/sys/arch/i386/conf/files.i386	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-#	$NetBSD: files.i386,v 1.372 2014/01/26 19:16:16 dsl Exp $
+#	$NetBSD: files.i386,v 1.373 2014/02/07 22:40:22 dsl Exp $
 #
 # new style config file for i386 architecture
 #
@@ -79,6 +79,7 @@ file	arch/i386/i386/machdep.c
 file 	arch/i386/i386/longrun.c
 file	arch/i386/i386/mtrr_k6.c	mtrr
 file	arch/i386/i386/process_machdep.c
+file	arch/x86/x86/convert_xmm_s87.c
 file	arch/i386/i386/trap.c
 file	dev/cons.c
 file	arch/i386/isa/npx.c

Index: src/sys/arch/i386/i386/process_machdep.c
diff -u src/sys/arch/i386/i386/process_machdep.c:1.81 src/sys/arch/i386/i386/process_machdep.c:1.82
--- src/sys/arch/i386/i386/process_machdep.c:1.81	Tue Feb  4 22:48:26 2014
+++ src/sys/arch/i386/i386/process_machdep.c	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: process_machdep.c,v 1.81 2014/02/04 22:48:26 dsl Exp $	*/
+/*	$NetBSD: process_machdep.c,v 1.82 2014/02/07 22:40:22 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.81 2014/02/04 22:48:26 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.82 2014/02/07 22:40:22 dsl Exp $");
 
 #include "opt_vm86.h"
 #include "opt_ptrace.h"
@@ -90,123 +90,6 @@ process_fpframe(struct lwp *l)
 	return &pcb->pcb_savefpu;
 }
 
-void
-process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
-{
-	unsigned int tag, ab_tag;
-	const struct fpaccfx *fx_reg;
-	struct fpacc87 *s87_reg;
-	int i;
-
-	/*
-	 * For historic reasons core dumps and ptrace all use the old save87
-	 * layout.  Convert the important parts.
-	 * getucontext gets what we give it.
-	 * setucontext should return something given by getucontext, but
-	 * we are (at the moment) willing to change it.
-	 *
-	 * It really isn't worth setting the 'tag' bits to 01 (zero) or
-	 * 10 (NaN etc) since the processor will set any internal bits
-	 * correctly when the value is loaded (the 287 believed them).
-	 *
-	 * Additionally the s87_tw and s87_tw are 'indexed' by the actual
-	 * register numbers, whereas the registers themselves have ST(0)
-	 * first. Pairing the values and tags can only be done with
-	 * reference to the 'top of stack'.
-	 *
-	 * If any x87 registers are used, they will typically be from
-	 * r7 downwards - so the high bits of the tag register indicate
-	 * used registers. The conversions are not optimised for this.
-	 *
-	 * The ABI we use requires the FP stack to be empty on every
-	 * function call. I think this means that the stack isn't expected
-	 * to overflow - overflow doesn't drop a core in my testing.
-	 *
-	 * Note that this code writes to all of the 's87' structure that
-	 * actually gets written to userspace.
-	 */
-
-	/* FPU control/status */
-	s87->s87_cw = sxmm->fx_cw;
-	s87->s87_sw = sxmm->fx_sw;
-	/* tag word handled below */
-	s87->s87_ip = sxmm->fx_ip;
-	s87->s87_opcode = sxmm->fx_opcode;
-	s87->s87_dp = sxmm->fx_dp;
-
-	/* FP registers (in stack order) */
-	fx_reg = sxmm->fx_87_ac;
-	s87_reg = s87->s87_ac;
-	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
-		*s87_reg = fx_reg->r;
-
-	/* Tag word and registers. */
-	ab_tag = sxmm->fx_tw & 0xff;	/* Bits set if valid */
-	if (ab_tag == 0) {
-		/* none used */
-		s87->s87_tw = 0xffff;
-		return;
-	}
-
-	tag = 0;
-	/* Separate bits of abridged tag word with zeros */
-	for (i = 0x80; i != 0; tag <<= 1, i >>= 1)
-		tag |= ab_tag & i;
-	/* Replicate and invert so that 0 => 0b11 and 1 => 0b00 */
-	s87->s87_tw = (tag | tag >> 1) ^ 0xffff;
-}
-
-void
-process_s87_to_xmm(const struct save87 *s87, struct fxsave *sxmm)
-{
-	unsigned int tag, ab_tag;
-	struct fpaccfx *fx_reg;
-	const struct fpacc87 *s87_reg;
-	int i;
-
-	/*
-	 * ptrace gives us registers in the save87 format and
-	 * we must convert them to the correct format.
-	 *
-	 * This code is normally used when overwriting the processes
-	 * registers (in the pcb), so it musn't change any other fields.
-	 *
-	 * There is a lot of pad in 'struct fxsave', if the destination
-	 * is written to userspace, it must be zeroed first.
-	 */
-
-	/* FPU control/status */
-	sxmm->fx_cw = s87->s87_cw;
-	sxmm->fx_sw = s87->s87_sw;
-	/* tag word handled below */
-	sxmm->fx_ip = s87->s87_ip;
-	sxmm->fx_opcode = s87->s87_opcode;
-	sxmm->fx_dp = s87->s87_dp;
-
-	/* Tag word */
-	tag = s87->s87_tw & 0xffff;	/* 0b11 => unused */
-	if (tag == 0xffff) {
-		/* All unused - values don't matter */
-		sxmm->fx_tw = 0;
-		return;
-	}
-
-	tag ^= 0xffff;		/* So 0b00 is unused */
-	tag |= tag >> 1;	/* Look at even bits */
-	ab_tag = 0;
-	i = 1;
-	do
-		ab_tag |= tag & i;
-	while ((tag >>= 1) >= (i <<= 1));
-	sxmm->fx_tw = ab_tag;
-
-	/* FP registers (in stack order) */
-	fx_reg = sxmm->fx_87_ac;
-	s87_reg = s87->s87_ac;
-	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
-		fx_reg->r = *s87_reg;
-}
-
 int
 process_read_regs(struct lwp *l, struct reg *regs)
 {

Index: src/sys/arch/i386/include/npx.h
diff -u src/sys/arch/i386/include/npx.h:1.34 src/sys/arch/i386/include/npx.h:1.35
--- src/sys/arch/i386/include/npx.h:1.34	Fri Feb  7 19:36:15 2014
+++ src/sys/arch/i386/include/npx.h	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: npx.h,v 1.34 2014/02/07 19:36:15 dsl Exp $	*/
+/*	$NetBSD: npx.h,v 1.35 2014/02/07 22:40:22 dsl Exp $	*/
 
 #ifndef	_I386_NPX_H_
 #define	_I386_NPX_H_
@@ -9,8 +9,6 @@
 
 int	npx586bug1(int, int);
 void 	fpuinit(struct cpu_info *);
-void	process_xmm_to_s87(const struct fxsave *, struct save87 *);
-void	process_s87_to_xmm(const struct save87 *, struct fxsave *);
 struct lwp;
 int	npxtrap(struct lwp *);
 

Index: src/sys/arch/x86/include/cpu_extended_state.h
diff -u src/sys/arch/x86/include/cpu_extended_state.h:1.1 src/sys/arch/x86/include/cpu_extended_state.h:1.2
--- src/sys/arch/x86/include/cpu_extended_state.h:1.1	Fri Feb  7 19:36:15 2014
+++ src/sys/arch/x86/include/cpu_extended_state.h	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu_extended_state.h,v 1.1 2014/02/07 19:36:15 dsl Exp $	*/
+/*	$NetBSD: cpu_extended_state.h,v 1.2 2014/02/07 22:40:22 dsl Exp $	*/
 
 #ifndef _X86_CPU_EXTENDED_STATE_H_
 #define _X86_CPU_EXTENDED_STATE_H_
@@ -118,7 +118,7 @@ __CTASSERT_NOLINT(sizeof (struct fxsave)
 
 /* The end of the fsave buffer can be used by the operating system */
 struct fxsave_os {
-	uint8_t	fxo_fxsave[offsetof(struct fxsave, fx_kernel)];
+	uint8_t	fxo_fxsave[512 - 48];
 	/* 48 bytes available */
 };
 
@@ -237,6 +237,12 @@ __CTASSERT(sizeof (struct xsave_ymm) == 
  * Bits 16-31 must be zero.
  */
 #define	__INITIAL_MXCSR__	0x1f80
+#define	__INITIAL_MXCSR_MASK__	0xffbf
+
+#ifdef _KERNEL
+void process_xmm_to_s87(const struct fxsave *, struct save87 *);
+void process_s87_to_xmm(const struct save87 *, struct fxsave *);
+#endif
 
 
 #endif /* _X86_CPU_EXTENDED_STATE_H_ */

Index: src/sys/arch/xen/conf/files.xen
diff -u src/sys/arch/xen/conf/files.xen:1.130 src/sys/arch/xen/conf/files.xen:1.131
--- src/sys/arch/xen/conf/files.xen:1.130	Sun Jan 26 19:16:17 2014
+++ src/sys/arch/xen/conf/files.xen	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-#	$NetBSD: files.xen,v 1.130 2014/01/26 19:16:17 dsl Exp $
+#	$NetBSD: files.xen,v 1.131 2014/02/07 22:40:22 dsl Exp $
 #	NetBSD: files.x86,v 1.10 2003/10/08 17:30:00 bouyer Exp 
 #	NetBSD: files.i386,v 1.254 2004/03/25 23:32:10 jmc Exp 
 
@@ -84,6 +84,7 @@ file	arch/amd64/amd64/lock_stubs.S
 endif
 
 file	kern/subr_disk_mbr.c		disk
+file	arch/x86/x86/convert_xmm_s87.c
 file	arch/x86/x86/db_memrw.c		ddb | kgdb
 file	arch/x86/x86/db_trace.c		ddb
 file	arch/xen/x86/hypervisor_machdep.c

Index: src/sys/compat/linux/arch/amd64/linux_machdep.c
diff -u src/sys/compat/linux/arch/amd64/linux_machdep.c:1.44 src/sys/compat/linux/arch/amd64/linux_machdep.c:1.45
--- src/sys/compat/linux/arch/amd64/linux_machdep.c:1.44	Sat Jan  4 00:10:03 2014
+++ src/sys/compat/linux/arch/amd64/linux_machdep.c	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.c,v 1.44 2014/01/04 00:10:03 dsl Exp $ */
+/*	$NetBSD: linux_machdep.c,v 1.45 2014/02/07 22:40:22 dsl Exp $ */
 
 /*-
  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
@@ -33,7 +33,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.44 2014/01/04 00:10:03 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.45 2014/02/07 22:40:22 dsl Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -90,9 +90,9 @@ linux_setregs(struct lwp *l, struct exec
 
 	l->l_md.md_flags &= ~MDL_USEDFPU;
 	pcb->pcb_flags = 0;
-	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
-	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
-	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+	pcb->pcb_savefpu.sv_xmm.fx_cw = __NetBSD_NPXCW__;
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
 
 	l->l_proc->p_flag &= ~PK_32;
 
@@ -134,7 +134,7 @@ linux_sendsig(const ksiginfo_t *ksi, con
 	int onstack, error;
 	int sig = ksi->ksi_signo;
 	struct linux_rt_sigframe *sfp, sigframe;
-	struct linux__fpstate *fpsp, fpstate;
+	struct linux__fpstate *fpsp;
 	struct fpreg fpregs;
 	struct trapframe *tf = l->l_md.md_regs;
 	sig_t catcher = SIGACTION(p, sig).sa_handler;
@@ -158,7 +158,7 @@ linux_sendsig(const ksiginfo_t *ksi, con
 	 */
 	if (l->l_md.md_flags & MDL_USEDFPU) {
 		sp = (char *)
-		    (((long)sp - sizeof(struct linux__fpstate)) & ~0xfUL);
+		    (((long)sp - sizeof (*fpsp)) & ~0xfUL);
 		fpsp = (struct linux__fpstate *)sp;
 	} else
 		fpsp = NULL;
@@ -230,21 +230,9 @@ linux_sendsig(const ksiginfo_t *ksi, con
 	 */
 	if (fpsp != NULL) {
 		size_t fp_size = sizeof fpregs;
+		/* The netbsd and linux structures both match the fxsave data */
 		(void)process_read_fpregs(l, &fpregs, &fp_size);
-		memset(&fpstate, 0, sizeof(fpstate));
-		fpstate.cwd = fpregs.fp_fcw;
-		fpstate.swd = fpregs.fp_fsw;
-		fpstate.twd = fpregs.fp_ftw;
-		fpstate.fop = fpregs.fp_fop;
-		fpstate.rip = fpregs.fp_rip;
-		fpstate.rdp = fpregs.fp_rdp;
-		fpstate.mxcsr = fpregs.fp_mxcsr;
-		fpstate.mxcsr_mask = fpregs.fp_mxcsr_mask;
-		memcpy(&fpstate.st_space, &fpregs.fp_st, 
-		    sizeof(fpstate.st_space));
-		memcpy(&fpstate.xmm_space, &fpregs.fp_xmm, 
-		    sizeof(fpstate.xmm_space));
-		error = copyout(&fpstate, fpsp, sizeof(fpstate));
+		error = copyout(&fpregs, fpsp, sizeof(*fpsp));
 	}
 
 	if (error == 0)
@@ -327,11 +315,10 @@ linux_sys_rt_sigreturn(struct lwp *l, co
 	struct linux_ucontext *luctx;
 	struct trapframe *tf = l->l_md.md_regs;
 	struct linux_sigcontext *lsigctx;
-	struct linux__fpstate fpstate;
 	struct linux_rt_sigframe frame, *fp;
 	ucontext_t uctx;
 	mcontext_t *mctx;
-	struct fxsave64 *fxarea;
+	struct fxsave *fxarea;
 	int error;
 
 	fp = (struct linux_rt_sigframe *)(tf->tf_rsp - 8);
@@ -345,7 +332,7 @@ linux_sys_rt_sigreturn(struct lwp *l, co
 
 	memset(&uctx, 0, sizeof(uctx));
 	mctx = (mcontext_t *)&uctx.uc_mcontext;
-	fxarea = (struct fxsave64 *)&mctx->__fpregs;
+	fxarea = (struct fxsave *)&mctx->__fpregs;
 
 	/* 
 	 * Set the flags. Linux always have CPU, stack and signal state,
@@ -395,25 +382,13 @@ linux_sys_rt_sigreturn(struct lwp *l, co
 	 * FPU state 
 	 */
 	if (lsigctx->fpstate != NULL) {
-		error = copyin(lsigctx->fpstate, &fpstate, sizeof(fpstate));
+		/* Both structures match the fxstate data */
+		error = copyin(lsigctx->fpstate, fxarea, sizeof(*fxarea));
 		if (error != 0) {
 			mutex_enter(l->l_proc->p_lock);
 			sigexit(l, SIGILL);
 			return error;
 		}
-
-		fxarea->fx_fcw = fpstate.cwd;
-		fxarea->fx_fsw = fpstate.swd;
-		fxarea->fx_ftw = fpstate.twd;
-		fxarea->fx_fop = fpstate.fop;
-		fxarea->fx_rip = fpstate.rip;
-		fxarea->fx_rdp = fpstate.rdp;
-		fxarea->fx_mxcsr = fpstate.mxcsr;
-		fxarea->fx_mxcsr_mask = fpstate.mxcsr_mask;
-		memcpy(&fxarea->fx_st, &fpstate.st_space, 
-		    sizeof(fxarea->fx_st));
-		memcpy(&fxarea->fx_xmm, &fpstate.xmm_space, 
-		    sizeof(fxarea->fx_xmm));
 	}
 
 	/*

Index: src/sys/compat/linux/arch/amd64/linux_machdep.h
diff -u src/sys/compat/linux/arch/amd64/linux_machdep.h:1.13 src/sys/compat/linux/arch/amd64/linux_machdep.h:1.14
--- src/sys/compat/linux/arch/amd64/linux_machdep.h:1.13	Wed Jul  7 01:30:33 2010
+++ src/sys/compat/linux/arch/amd64/linux_machdep.h	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.h,v 1.13 2010/07/07 01:30:33 chs Exp $ */
+/*	$NetBSD: linux_machdep.h,v 1.14 2014/02/07 22:40:22 dsl Exp $ */
 
 /*-
  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
@@ -41,6 +41,7 @@
 #include <compat/linux/common/linux_siginfo.h>
 
 /* From <asm/sigcontext.h> */
+/* Matches the cpu's fxsave format */
 struct linux__fpstate {
 	u_int16_t cwd;
 	u_int16_t swd;
@@ -54,6 +55,7 @@ struct linux__fpstate {
 	u_int32_t xmm_space[64];
 	u_int32_t reserved2[24];
 };
+__CTASSERT(sizeof (struct linux__fpstate) == 512);
 
 /* From <asm/sigcontext.h> */
 struct linux_sigcontext {

Index: src/sys/compat/linux32/arch/amd64/linux32_machdep.c
diff -u src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.33 src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.34
--- src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.33	Sun Dec  1 01:05:16 2013
+++ src/sys/compat/linux32/arch/amd64/linux32_machdep.c	Fri Feb  7 22:40:22 2014
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux32_machdep.c,v 1.33 2013/12/01 01:05:16 christos Exp $ */
+/*	$NetBSD: linux32_machdep.c,v 1.34 2014/02/07 22:40:22 dsl Exp $ */
 
 /*-
  * Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved.
@@ -31,7 +31,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.33 2013/12/01 01:05:16 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.34 2014/02/07 22:40:22 dsl Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -286,9 +286,9 @@ linux32_setregs(struct lwp *l, struct ex
 	l->l_md.md_flags &= ~MDL_USEDFPU;
 	l->l_md.md_flags |= MDL_COMPAT32;	/* Forces iret not sysret */
 	pcb->pcb_flags = PCB_COMPAT32;
-	pcb->pcb_savefpu.fp_fxsave.fx_fcw = __Linux_NPXCW__;
-	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
-	pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
+	pcb->pcb_savefpu.sv_xmm.fx_cw = __Linux_NPXCW__;
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
+	pcb->pcb_savefpu.sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
 
 	p->p_flag |= PK_32;
 

Added files:

Index: src/sys/arch/x86/x86/convert_xmm_s87.c
diff -u /dev/null src/sys/arch/x86/x86/convert_xmm_s87.c:1.1
--- /dev/null	Fri Feb  7 22:40:23 2014
+++ src/sys/arch/x86/x86/convert_xmm_s87.c	Fri Feb  7 22:40:22 2014
@@ -0,0 +1,155 @@
+/*	$NetBSD: convert_xmm_s87.c,v 1.1 2014/02/07 22:40:22 dsl Exp $	*/
+
+/*-
+ * Copyright (c) 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum; by Jason R. Thorpe of Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: convert_xmm_s87.c,v 1.1 2014/02/07 22:40:22 dsl Exp $");
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <x86/cpu_extended_state.h>
+
+void
+process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
+{
+	unsigned int tag, ab_tag;
+	const struct fpaccfx *fx_reg;
+	struct fpacc87 *s87_reg;
+	int i;
+
+	/*
+	 * For historic reasons core dumps and ptrace all use the old save87
+	 * layout.  Convert the important parts.
+	 * getucontext gets what we give it.
+	 * setucontext should return something given by getucontext, but
+	 * we are (at the moment) willing to change it.
+	 *
+	 * It really isn't worth setting the 'tag' bits to 01 (zero) or
+	 * 10 (NaN etc) since the processor will set any internal bits
+	 * correctly when the value is loaded (the 287 believed them).
+	 *
+	 * Additionally the s87_tw and s87_tw are 'indexed' by the actual
+	 * register numbers, whereas the registers themselves have ST(0)
+	 * first. Pairing the values and tags can only be done with
+	 * reference to the 'top of stack'.
+	 *
+	 * If any x87 registers are used, they will typically be from
+	 * r7 downwards - so the high bits of the tag register indicate
+	 * used registers. The conversions are not optimised for this.
+	 *
+	 * The ABI we use requires the FP stack to be empty on every
+	 * function call. I think this means that the stack isn't expected
+	 * to overflow - overflow doesn't drop a core in my testing.
+	 *
+	 * Note that this code writes to all of the 's87' structure that
+	 * actually gets written to userspace.
+	 */
+
+	/* FPU control/status */
+	s87->s87_cw = sxmm->fx_cw;
+	s87->s87_sw = sxmm->fx_sw;
+	/* tag word handled below */
+	s87->s87_ip = sxmm->fx_ip;
+	s87->s87_opcode = sxmm->fx_opcode;
+	s87->s87_dp = sxmm->fx_dp;
+
+	/* FP registers (in stack order) */
+	fx_reg = sxmm->fx_87_ac;
+	s87_reg = s87->s87_ac;
+	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
+		*s87_reg = fx_reg->r;
+
+	/* Tag word and registers. */
+	ab_tag = sxmm->fx_tw & 0xff;	/* Bits set if valid */
+	if (ab_tag == 0) {
+		/* none used */
+		s87->s87_tw = 0xffff;
+		return;
+	}
+
+	tag = 0;
+	/* Separate bits of abridged tag word with zeros */
+	for (i = 0x80; i != 0; tag <<= 1, i >>= 1)
+		tag |= ab_tag & i;
+	/* Replicate and invert so that 0 => 0b11 and 1 => 0b00 */
+	s87->s87_tw = (tag | tag >> 1) ^ 0xffff;
+}
+
+void
+process_s87_to_xmm(const struct save87 *s87, struct fxsave *sxmm)
+{
+	unsigned int tag, ab_tag;
+	struct fpaccfx *fx_reg;
+	const struct fpacc87 *s87_reg;
+	int i;
+
+	/*
+	 * ptrace gives us registers in the save87 format and
+	 * we must convert them to the correct format.
+	 *
+	 * This code is normally used when overwriting the processes
+	 * registers (in the pcb), so it musn't change any other fields.
+	 *
+	 * There is a lot of pad in 'struct fxsave', if the destination
+	 * is written to userspace, it must be zeroed first.
+	 */
+
+	/* FPU control/status */
+	sxmm->fx_cw = s87->s87_cw;
+	sxmm->fx_sw = s87->s87_sw;
+	/* tag word handled below */
+	sxmm->fx_ip = s87->s87_ip;
+	sxmm->fx_opcode = s87->s87_opcode;
+	sxmm->fx_dp = s87->s87_dp;
+
+	/* Tag word */
+	tag = s87->s87_tw & 0xffff;	/* 0b11 => unused */
+	if (tag == 0xffff) {
+		/* All unused - values don't matter */
+		sxmm->fx_tw = 0;
+		return;
+	}
+
+	tag ^= 0xffff;		/* So 0b00 is unused */
+	tag |= tag >> 1;	/* Look at even bits */
+	ab_tag = 0;
+	i = 1;
+	do
+		ab_tag |= tag & i;
+	while ((tag >>= 1) >= (i <<= 1));
+	sxmm->fx_tw = ab_tag;
+
+	/* FP registers (in stack order) */
+	fx_reg = sxmm->fx_87_ac;
+	s87_reg = s87->s87_ac;
+	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
+		fx_reg->r = *s87_reg;
+}

Reply via email to