Author: alc
Date: Thu Mar 29 06:01:11 2012
New Revision: 233649
URL: http://svn.freebsd.org/changeset/base/233649

Log:
  MFC r233291
    Handle spurious page faults that may occur in no-fault sections of the
    kernel.

Modified:
  stable/8/sys/amd64/amd64/trap.c
  stable/8/sys/amd64/include/proc.h
  stable/8/sys/i386/i386/trap.c
  stable/8/sys/i386/include/proc.h
  stable/8/sys/kern/kern_subr.c
  stable/8/sys/kern/kern_sysctl.c
  stable/8/sys/sys/proc.h
  stable/8/sys/vm/vm_fault.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/boot/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/e1000/   (props changed)
  stable/8/sys/i386/conf/XENHVM   (props changed)

Modified: stable/8/sys/amd64/amd64/trap.c
==============================================================================
--- stable/8/sys/amd64/amd64/trap.c     Thu Mar 29 05:02:12 2012        
(r233648)
+++ stable/8/sys/amd64/amd64/trap.c     Thu Mar 29 06:01:11 2012        
(r233649)
@@ -305,26 +305,6 @@ trap(struct trapframe *frame)
        }
 
        code = frame->tf_err;
-       if (type == T_PAGEFLT) {
-               /*
-                * If we get a page fault while in a critical section, then
-                * it is most likely a fatal kernel page fault.  The kernel
-                * is already going to panic trying to get a sleep lock to
-                * do the VM lookup, so just consider it a fatal trap so the
-                * kernel can print out a useful trap message and even get
-                * to the debugger.
-                *
-                * If we get a page fault while holding a non-sleepable
-                * lock, then it is most likely a fatal kernel page fault.
-                * If WITNESS is enabled, then it's going to whine about
-                * bogus LORs with various VM locks, so just skip to the
-                * fatal trap handling directly.
-                */
-               if (td->td_critnest != 0 ||
-                   WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
-                   "Kernel page fault") != 0)
-                       trap_fatal(frame, frame->tf_addr);
-       }
 
         if (ISPL(frame->tf_cs) == SEL_UPL) {
                /* user trap */
@@ -657,6 +637,50 @@ trap_pfault(frame, usermode)
        struct proc *p = td->td_proc;
        vm_offset_t eva = frame->tf_addr;
 
+       if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
+               /*
+                * Due to both processor errata and lazy TLB invalidation when
+                * access restrictions are removed from virtual pages, memory
+                * accesses that are allowed by the physical mapping layer may
+                * nonetheless cause one spurious page fault per virtual page. 
+                * When the thread is executing a "no faulting" section that
+                * is bracketed by vm_fault_{disable,enable}_pagefaults(),
+                * every page fault is treated as a spurious page fault,
+                * unless it accesses the same virtual address as the most
+                * recent page fault within the same "no faulting" section.
+                */
+               if (td->td_md.md_spurflt_addr != eva ||
+                   (td->td_pflags & TDP_RESETSPUR) != 0) {
+                       /*
+                        * Do nothing to the TLB.  A stale TLB entry is
+                        * flushed automatically by a page fault.
+                        */
+                       td->td_md.md_spurflt_addr = eva;
+                       td->td_pflags &= ~TDP_RESETSPUR;
+                       return (0);
+               }
+       } else {
+               /*
+                * If we get a page fault while in a critical section, then
+                * it is most likely a fatal kernel page fault.  The kernel
+                * is already going to panic trying to get a sleep lock to
+                * do the VM lookup, so just consider it a fatal trap so the
+                * kernel can print out a useful trap message and even get
+                * to the debugger.
+                *
+                * If we get a page fault while holding a non-sleepable
+                * lock, then it is most likely a fatal kernel page fault.
+                * If WITNESS is enabled, then it's going to whine about
+                * bogus LORs with various VM locks, so just skip to the
+                * fatal trap handling directly.
+                */
+               if (td->td_critnest != 0 ||
+                   WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+                   "Kernel page fault") != 0) {
+                       trap_fatal(frame, eva);
+                       return (-1);
+               }
+       }
        va = trunc_page(eva);
        if (va >= VM_MIN_KERNEL_ADDRESS) {
                /*

Modified: stable/8/sys/amd64/include/proc.h
==============================================================================
--- stable/8/sys/amd64/include/proc.h   Thu Mar 29 05:02:12 2012        
(r233648)
+++ stable/8/sys/amd64/include/proc.h   Thu Mar 29 06:01:11 2012        
(r233649)
@@ -46,6 +46,7 @@ struct proc_ldt {
 struct mdthread {
        int     md_spinlock_count;      /* (k) */
        register_t md_saved_flags;      /* (k) */
+       register_t md_spurflt_addr;     /* (k) Spurious page fault address. */
 };
 
 struct mdproc {

Modified: stable/8/sys/i386/i386/trap.c
==============================================================================
--- stable/8/sys/i386/i386/trap.c       Thu Mar 29 05:02:12 2012        
(r233648)
+++ stable/8/sys/i386/i386/trap.c       Thu Mar 29 06:01:11 2012        
(r233649)
@@ -333,28 +333,13 @@ trap(struct trapframe *frame)
                 * For some Cyrix CPUs, %cr2 is clobbered by
                 * interrupts.  This problem is worked around by using
                 * an interrupt gate for the pagefault handler.  We
-                * are finally ready to read %cr2 and then must
-                * reenable interrupts.
-                *
-                * If we get a page fault while in a critical section, then
-                * it is most likely a fatal kernel page fault.  The kernel
-                * is already going to panic trying to get a sleep lock to
-                * do the VM lookup, so just consider it a fatal trap so the
-                * kernel can print out a useful trap message and even get
-                * to the debugger.
-                *
-                * If we get a page fault while holding a non-sleepable
-                * lock, then it is most likely a fatal kernel page fault.
-                * If WITNESS is enabled, then it's going to whine about
-                * bogus LORs with various VM locks, so just skip to the
-                * fatal trap handling directly.
+                * are finally ready to read %cr2 and conditionally
+                * reenable interrupts.  If we hold a spin lock, then
+                * we must not reenable interrupts.  This might be a
+                * spurious page fault.
                 */
                eva = rcr2();
-               if (td->td_critnest != 0 ||
-                   WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
-                   "Kernel page fault") != 0)
-                       trap_fatal(frame, eva);
-               else
+               if (td->td_md.md_spinlock_count == 0)
                        enable_intr();
        }
 
@@ -807,6 +792,50 @@ trap_pfault(frame, usermode, eva)
        struct thread *td = curthread;
        struct proc *p = td->td_proc;
 
+       if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
+               /*
+                * Due to both processor errata and lazy TLB invalidation when
+                * access restrictions are removed from virtual pages, memory
+                * accesses that are allowed by the physical mapping layer may
+                * nonetheless cause one spurious page fault per virtual page. 
+                * When the thread is executing a "no faulting" section that
+                * is bracketed by vm_fault_{disable,enable}_pagefaults(),
+                * every page fault is treated as a spurious page fault,
+                * unless it accesses the same virtual address as the most
+                * recent page fault within the same "no faulting" section.
+                */
+               if (td->td_md.md_spurflt_addr != eva ||
+                   (td->td_pflags & TDP_RESETSPUR) != 0) {
+                       /*
+                        * Do nothing to the TLB.  A stale TLB entry is
+                        * flushed automatically by a page fault.
+                        */
+                       td->td_md.md_spurflt_addr = eva;
+                       td->td_pflags &= ~TDP_RESETSPUR;
+                       return (0);
+               }
+       } else {
+               /*
+                * If we get a page fault while in a critical section, then
+                * it is most likely a fatal kernel page fault.  The kernel
+                * is already going to panic trying to get a sleep lock to
+                * do the VM lookup, so just consider it a fatal trap so the
+                * kernel can print out a useful trap message and even get
+                * to the debugger.
+                *
+                * If we get a page fault while holding a non-sleepable
+                * lock, then it is most likely a fatal kernel page fault.
+                * If WITNESS is enabled, then it's going to whine about
+                * bogus LORs with various VM locks, so just skip to the
+                * fatal trap handling directly.
+                */
+               if (td->td_critnest != 0 ||
+                   WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+                   "Kernel page fault") != 0) {
+                       trap_fatal(frame, eva);
+                       return (-1);
+               }
+       }
        va = trunc_page(eva);
        if (va >= KERNBASE) {
                /*

Modified: stable/8/sys/i386/include/proc.h
==============================================================================
--- stable/8/sys/i386/include/proc.h    Thu Mar 29 05:02:12 2012        
(r233648)
+++ stable/8/sys/i386/include/proc.h    Thu Mar 29 06:01:11 2012        
(r233649)
@@ -51,6 +51,7 @@ struct proc_ldt {
 struct mdthread {
        int     md_spinlock_count;      /* (k) */
        register_t md_saved_flags;      /* (k) */
+       register_t md_spurflt_addr;     /* (k) Spurious page fault address. */
 };
 
 struct mdproc {

Modified: stable/8/sys/kern/kern_subr.c
==============================================================================
--- stable/8/sys/kern/kern_subr.c       Thu Mar 29 05:02:12 2012        
(r233648)
+++ stable/8/sys/kern/kern_subr.c       Thu Mar 29 06:01:11 2012        
(r233649)
@@ -190,8 +190,12 @@ uiomove_faultflag(void *cp, int n, struc
 
        /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
        newflags = TDP_DEADLKTREAT;
-       if (uio->uio_segflg == UIO_USERSPACE && nofault)
-               newflags |= TDP_NOFAULTING;
+       if (uio->uio_segflg == UIO_USERSPACE && nofault) {
+               /*
+                * Fail if a non-spurious page fault occurs.
+                */
+               newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
+       }
        save = curthread_pflags_set(newflags);
 
        while (n > 0 && uio->uio_resid) {

Modified: stable/8/sys/kern/kern_sysctl.c
==============================================================================
--- stable/8/sys/kern/kern_sysctl.c     Thu Mar 29 05:02:12 2012        
(r233648)
+++ stable/8/sys/kern/kern_sysctl.c     Thu Mar 29 06:01:11 2012        
(r233649)
@@ -1251,8 +1251,8 @@ kernel_sysctlbyname(struct thread *td, c
 static int
 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
 {
-       int error = 0;
        size_t i, len, origidx;
+       int error;
 
        origidx = req->oldidx;
        req->oldidx += l;
@@ -1273,10 +1273,14 @@ sysctl_old_user(struct sysctl_req *req, 
        else {
                if (i > len - origidx)
                        i = len - origidx;
-               error = copyout(p, (char *)req->oldptr + origidx, i);
+               if (req->lock == REQ_WIRED) {
+                       error = copyout_nofault(p, (char *)req->oldptr +
+                           origidx, i);
+               } else
+                       error = copyout(p, (char *)req->oldptr + origidx, i);
+               if (error != 0)
+                       return (error);
        }
-       if (error)
-               return (error);
        if (i < l)
                return (ENOMEM);
        return (0);

Modified: stable/8/sys/sys/proc.h
==============================================================================
--- stable/8/sys/sys/proc.h     Thu Mar 29 05:02:12 2012        (r233648)
+++ stable/8/sys/sys/proc.h     Thu Mar 29 06:01:11 2012        (r233649)
@@ -409,6 +409,7 @@ do {                                                        
                \
 #define        TDP_CALLCHAIN   0x00400000 /* Capture thread's callchain */
 #define        TDP_IGNSUSP     0x00800000 /* Permission to ignore the 
MNTK_SUSPEND* */
 #define        TDP_AUDITREC    0x01000000 /* Audit record pending on thread */
+#define        TDP_RESETSPUR   0x04000000 /* Reset spurious page fault 
history. */
 
 /*
  * Reasons that the current thread can not be run yet.

Modified: stable/8/sys/vm/vm_fault.c
==============================================================================
--- stable/8/sys/vm/vm_fault.c  Thu Mar 29 05:02:12 2012        (r233648)
+++ stable/8/sys/vm/vm_fault.c  Thu Mar 29 06:01:11 2012        (r233649)
@@ -1409,11 +1409,17 @@ vm_fault_additional_pages(m, rbehind, ra
        return i;
 }
 
+/*
+ * Block entry into the machine-independent layer's page fault handler by
+ * the calling thread.  Subsequent calls to vm_fault() by that thread will
+ * return KERN_PROTECTION_FAILURE.  Enable machine-dependent handling of
+ * spurious page faults. 
+ */
 int
 vm_fault_disable_pagefaults(void)
 {
 
-       return (curthread_pflags_set(TDP_NOFAULTING));
+       return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR));
 }
 
 void
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to