Author: kib
Date: Fri Mar 21 14:25:09 2014
New Revision: 263475
URL: http://svnweb.freebsd.org/changeset/base/263475

Log:
  Fix two issues with /dev/mem access on amd64, both causing kernel page
  faults.
  
  First, for accesses to direct map region should check for the limit by
  which direct map is instantiated.
  
  Second, for accesses to the kernel map, success returned from the
  kernacc(9) does not guarantee that consequent attempt to read or write
  to the checked address succeed, since other thread might invalidate
  the address meantime.  Add a new thread private flag TDP_DEVMEMIO,
  which instructs vm_fault() to return error when fault happens on the
  MAP_ENTRY_NOFAULT entry, instead of panicing.  The trap handler would
  then see a page fault from access, and recover in normal way, making
  /dev/mem access safer.
  
  Remove GIANT_REQUIRED from the amd64 memrw(), since it is not needed
  and having Giant locked does not solve issues for amd64.
  
  Note that at least the second issue exists on other architectures, and
  requires similar patching for md code.
  
  Reported and tested by:       clusteradm (gjb, sbruno)
  Sponsored by: The FreeBSD Foundation
  MFC after:    1 week

Modified:
  head/sys/amd64/amd64/mem.c
  head/sys/amd64/amd64/trap.c
  head/sys/kern/subr_trap.c
  head/sys/sys/proc.h
  head/sys/vm/vm_fault.c

Modified: head/sys/amd64/amd64/mem.c
==============================================================================
--- head/sys/amd64/amd64/mem.c  Fri Mar 21 14:19:47 2014        (r263474)
+++ head/sys/amd64/amd64/mem.c  Fri Mar 21 14:25:09 2014        (r263475)
@@ -76,14 +76,16 @@ MALLOC_DEFINE(M_MEMDESC, "memdesc", "mem
 int
 memrw(struct cdev *dev, struct uio *uio, int flags)
 {
-       int o;
-       u_long c = 0, v;
        struct iovec *iov;
-       int error = 0;
+       u_long c, v;
+       int error, o, sflags;
        vm_offset_t addr, eaddr;
 
        GIANT_REQUIRED;
 
+       error = 0;
+       c = 0;
+       sflags = curthread_pflags_set(TDP_DEVMEMIO);
        while (uio->uio_resid > 0 && error == 0) {
                iov = uio->uio_iov;
                if (iov->iov_len == 0) {
@@ -98,7 +100,15 @@ memrw(struct cdev *dev, struct uio *uio,
 kmemphys:
                        o = v & PAGE_MASK;
                        c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o));
-                       error = uiomove((void *)PHYS_TO_DMAP(v), (int)c, uio);
+                       v = PHYS_TO_DMAP(v);
+                       if (v < DMAP_MIN_ADDRESS ||
+                           (v > DMAP_MIN_ADDRESS + dmaplimit &&
+                           v <= DMAP_MAX_ADDRESS) ||
+                           pmap_kextract(v) == 0) {
+                               error = EFAULT;
+                               goto ret;
+                       }
+                       error = uiomove((void *)v, (int)c, uio);
                        continue;
                }
                else if (dev2unit(dev) == CDEV_MINOR_KMEM) {
@@ -119,22 +129,30 @@ kmemphys:
                        addr = trunc_page(v);
                        eaddr = round_page(v + c);
 
-                       if (addr < VM_MIN_KERNEL_ADDRESS)
-                               return (EFAULT);
-                       for (; addr < eaddr; addr += PAGE_SIZE) 
-                               if (pmap_extract(kernel_pmap, addr) == 0)
-                                       return (EFAULT);
-
+                       if (addr < VM_MIN_KERNEL_ADDRESS) {
+                               error = EFAULT;
+                               goto ret;
+                       }
+                       for (; addr < eaddr; addr += PAGE_SIZE) {
+                               if (pmap_extract(kernel_pmap, addr) == 0) {
+                                       error = EFAULT;
+                                       goto ret;
+                               }
+                       }
                        if (!kernacc((caddr_t)(long)v, c,
                            uio->uio_rw == UIO_READ ? 
-                           VM_PROT_READ : VM_PROT_WRITE))
-                               return (EFAULT);
+                           VM_PROT_READ : VM_PROT_WRITE)) {
+                               error = EFAULT;
+                               goto ret;
+                       }
 
                        error = uiomove((caddr_t)(long)v, (int)c, uio);
                        continue;
                }
                /* else panic! */
        }
+ret:
+       curthread_pflags_restore(sflags);
        return (error);
 }
 

Modified: head/sys/amd64/amd64/trap.c
==============================================================================
--- head/sys/amd64/amd64/trap.c Fri Mar 21 14:19:47 2014        (r263474)
+++ head/sys/amd64/amd64/trap.c Fri Mar 21 14:25:09 2014        (r263475)
@@ -789,6 +789,12 @@ nogo:
                        frame->tf_rip = (long)curpcb->pcb_onfault;
                        return (0);
                }
+               if ((td->td_pflags & TDP_DEVMEMIO) != 0) {
+                       KASSERT(curpcb->pcb_onfault != NULL,
+                           ("/dev/mem without pcb_onfault"));
+                       frame->tf_rip = (long)curpcb->pcb_onfault;
+                       return (0);
+               }
                trap_fatal(frame, eva);
                return (-1);
        }

Modified: head/sys/kern/subr_trap.c
==============================================================================
--- head/sys/kern/subr_trap.c   Fri Mar 21 14:19:47 2014        (r263474)
+++ head/sys/kern/subr_trap.c   Fri Mar 21 14:25:09 2014        (r263475)
@@ -157,6 +157,8 @@ userret(struct thread *td, struct trapfr
            td->td_rw_rlocks));
        KASSERT((td->td_pflags & TDP_NOFAULTING) == 0,
            ("userret: Returning with pagefaults disabled"));
+       KASSERT((td->td_pflags & TDP_DEVMEMIO) == 0,
+           ("userret: Returning with /dev/mem i/o leaked"));
        KASSERT(td->td_no_sleeping == 0,
            ("userret: Returning with sleep disabled"));
        KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0,

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h Fri Mar 21 14:19:47 2014        (r263474)
+++ head/sys/sys/proc.h Fri Mar 21 14:25:09 2014        (r263475)
@@ -428,6 +428,7 @@ do {                                                        
                \
 #define        TDP_RESETSPUR   0x04000000 /* Reset spurious page fault 
history. */
 #define        TDP_NERRNO      0x08000000 /* Last errno is already in td_errno 
*/
 #define        TDP_UIOHELD     0x10000000 /* Current uio has pages held in 
td_ma */
+#define        TDP_DEVMEMIO    0x20000000 /* Accessing memory for /dev/mem */
 
 /*
  * Reasons that the current thread can not be run yet.

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Fri Mar 21 14:19:47 2014        (r263474)
+++ head/sys/vm/vm_fault.c      Fri Mar 21 14:25:09 2014        (r263475)
@@ -269,6 +269,10 @@ RetryFault:;
        map_generation = fs.map->timestamp;
 
        if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
+               if ((curthread->td_pflags & TDP_DEVMEMIO) != 0) {
+                       vm_map_unlock_read(fs.map);
+                       return (KERN_FAILURE);
+               }
                panic("vm_fault: fault on nofault entry, addr: %lx",
                    (u_long)vaddr);
        }
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to