Module Name:    src
Committed By:   uebayasi
Date:           Sun Nov 21 12:43:00 UTC 2010

Modified Files:
        src/sys/miscfs/genfs [uebayasi-xip]: genfs_io.c
        src/sys/uvm [uebayasi-xip]: uvm_bio.c uvm_fault.c uvm_pager.h

Log Message:
Resurrect PGO_ZERO support.

When vnode pager encounters hole pages in XIP'ed vnodes, it fills
page slots with PGO_ZERO and returns them back to the caller (fault
handler).  Fault handlers are responsible to check page slots and
redirect PGO_ZERO to the single "zero page" allocated by calling
uvm_page_zeropage_alloc(9).

The zero page is wired, read-only (PG_RDONLY) page.  It's shared
by multiple vnodes, it has no single owner.

XIP'ed vnodes are supposed to be "stable" during I/O (unlocked).
Because XIP'ed mounts are always read-only.  There's no chance to
change mappings of XIP'ed vnodes and their XIP'ed pages.  Thus the
cached uobj is reused after pgo_get() for PGO_ZERO.

(Do we need a new concept of "read-only UVM object"?)


To generate a diff of this commit:
cvs rdiff -u -r1.36.2.59 -r1.36.2.60 src/sys/miscfs/genfs/genfs_io.c
cvs rdiff -u -r1.68.2.12 -r1.68.2.13 src/sys/uvm/uvm_bio.c
cvs rdiff -u -r1.166.2.23 -r1.166.2.24 src/sys/uvm/uvm_fault.c
cvs rdiff -u -r1.38.14.1 -r1.38.14.2 src/sys/uvm/uvm_pager.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/miscfs/genfs/genfs_io.c
diff -u src/sys/miscfs/genfs/genfs_io.c:1.36.2.59 src/sys/miscfs/genfs/genfs_io.c:1.36.2.60
--- src/sys/miscfs/genfs/genfs_io.c:1.36.2.59	Sun Nov 21 07:41:49 2010
+++ src/sys/miscfs/genfs/genfs_io.c	Sun Nov 21 12:42:59 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.36.2.59 2010/11/21 07:41:49 uebayasi Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.36.2.60 2010/11/21 12:42:59 uebayasi Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.36.2.59 2010/11/21 07:41:49 uebayasi Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.36.2.60 2010/11/21 12:42:59 uebayasi Exp $");
 
 #include "opt_xip.h"
 
@@ -652,13 +652,11 @@
 		 */
 
 		if (blkno == (daddr_t)-1) {
-		    if (!xip) {
 			int holepages = (round_page(offset + iobytes) -
 			    trunc_page(offset)) >> PAGE_SHIFT;
 			UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
 
-			KASSERT(!xip);
-
+		    if (!xip) {
 			sawhole = true;
 			memset((char *)kva + (offset - startoffset), 0,
 			    iobytes);
@@ -673,7 +671,11 @@
 				}
 			}
 		    } else {
-			panic("XIP hole page is not supported yet");
+			for (i = 0; i < holepages; i++) {
+				pgs[ridx + pidx + i] = PGO_ZERO;
+			}
+			UVMHIST_LOG(ubchist, "xip HOLE pgs %d .. %d",
+			    pidx, pidx + holepages - 1, 0, 0);
 		    }
 			continue;
 		}
@@ -899,6 +901,7 @@
 	for (i = ridx; i < ridx + npages; i++) {
 		struct vm_page *pg = pgs[i];
 
+	    if (pg != PGO_ZERO) {
 		KASSERT(pg != NULL);
 		KASSERT((pg->flags & PG_RDONLY) != 0);
 		KASSERT((pg->flags & PG_BUSY) != 0);
@@ -908,10 +911,18 @@
 
 		/*
 		 * XXXUEBS
-		 * Actually this is not necessary, because device pages are
-		 * "stateless", and they have no owner.
+		 * Actually this is not necessary, because device
+		 * pages are "stateless", and they have no owner.
 		 */
 		pg->uobject = &vp->v_uobj;
+	    } else {
+		/*
+		 * XIP hole pages are passed as a magic pointer
+		 * back to fault handlers.  Fault handlers are
+		 * respoinsible to check it and redirect the VA to
+		 * a single "zero page".
+		 */
+	    }
 	}
     } /* xip */
 	mutex_exit(&uobj->vmobjlock);

Index: src/sys/uvm/uvm_bio.c
diff -u src/sys/uvm/uvm_bio.c:1.68.2.12 src/sys/uvm/uvm_bio.c:1.68.2.13
--- src/sys/uvm/uvm_bio.c:1.68.2.12	Thu Nov  4 08:47:38 2010
+++ src/sys/uvm/uvm_bio.c	Sun Nov 21 12:42:59 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_bio.c,v 1.68.2.12 2010/11/04 08:47:38 uebayasi Exp $	*/
+/*	$NetBSD: uvm_bio.c,v 1.68.2.13 2010/11/21 12:42:59 uebayasi Exp $	*/
 
 /*
  * Copyright (c) 1998 Chuck Silvers.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.68.2.12 2010/11/04 08:47:38 uebayasi Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.68.2.13 2010/11/21 12:42:59 uebayasi Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_ubc.h"
@@ -224,16 +224,20 @@
 
 static inline int
 ubc_fault_page(const struct uvm_faultinfo *ufi, const struct ubc_map *umap,
-    struct vm_page *pg, vm_prot_t prot, vm_prot_t access_type, vaddr_t va)
+    struct uvm_object *uobj, struct vm_page *pg, vm_prot_t prot,
+    vm_prot_t access_type, vaddr_t va)
 {
-	struct uvm_object *uobj;
 	vm_prot_t mask;
 	int error;
 	bool rdonly;
 
-	uobj = pg->uobject;
+	UVMHIST_FUNC("ubc_fault_page"); UVMHIST_CALLED(ubchist);
+
+	KASSERT(pg != NULL);
+	KASSERT(pg == PGO_ZERO || uobj == pg->uobject);
 	KASSERT(mutex_owned(&uobj->vmobjlock));
 
+    if (__predict_true(pg != PGO_ZERO)) {
 	if (pg->flags & PG_WANTED) {
 		wakeup(pg);
 	}
@@ -264,17 +268,29 @@
 			pg = newpg;
 		}
 	}
+    }
 
 	/*
 	 * Note that a page whose backing store is partially allocated
 	 * is marked as PG_RDONLY.
 	 */
 
-	KASSERT((pg->flags & PG_RDONLY) == 0 ||
+	KASSERT(pg == PGO_ZERO ||
+	    (pg->flags & PG_RDONLY) == 0 ||
 	    (access_type & VM_PROT_WRITE) == 0 ||
 	    pg->offset < umap->writeoff ||
 	    pg->offset + PAGE_SIZE > umap->writeoff + umap->writelen);
 
+	if (__predict_false(pg == PGO_ZERO)) {
+		UVMHIST_LOG(ubchist, "replacing PGO_ZERO with zeropage",0,0,0,0);
+		pg = uvm_page_zeropage_alloc();
+		UVMHIST_LOG(ubchist,
+		    "PGO_ZERO replaced with pg %p (phys_addr=0x%lx)",
+		    pg, VM_PAGE_TO_PHYS(pg), 0, 0);
+		KASSERT(pg != NULL);
+		KASSERT((pg->flags & PG_RDONLY) != 0);
+	}
+
 	rdonly = ((access_type & VM_PROT_WRITE) == 0 &&
 	    (pg->flags & PG_RDONLY) != 0) ||
 	    UVM_OBJ_NEEDS_WRITEFAULT(uobj);
@@ -283,6 +299,7 @@
 	error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
 	    prot & mask, PMAP_CANFAIL | (access_type & mask));
 
+    if (__predict_true(pg != uvm_page_zeropage)) {
 	if (__predict_true((pg->flags & PG_DEVICE) == 0)) {
 		mutex_enter(&uvm_pageqlock);
 		uvm_pageactivate(pg);
@@ -290,6 +307,7 @@
 	}
 	pg->flags &= ~(PG_BUSY|PG_WANTED);
 	UVM_PAGE_OWN(pg, NULL);
+    }
 
 	return error;
 }
@@ -302,7 +320,7 @@
 ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2,
     int ign3, int ign4, vm_prot_t access_type, int flags)
 {
-	struct uvm_object *uobj;
+	struct uvm_object *uobj, *ouobj;
 	struct ubc_map *umap;
 	vaddr_t va, eva, ubc_offset, slot_offset;
 	struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT];
@@ -353,7 +371,7 @@
 #endif
 
 	/* no umap locking needed since we have a ref on the umap */
-	uobj = umap->uobj;
+	ouobj = uobj = umap->uobj;
 
 	if ((access_type & VM_PROT_WRITE) == 0) {
 		npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
@@ -422,6 +440,7 @@
 		if (pg == NULL || pg == PGO_DONTCARE) {
 			continue;
 		}
+	    if (__predict_true(pg != PGO_ZERO)) {
 		if (__predict_false(pg->uobject != uobj)) {
 			/* Check for the first iteration and error cases. */
 			if (uobj != NULL) {
@@ -432,7 +451,14 @@
 			uobj = pg->uobject;
 			mutex_enter(&uobj->vmobjlock);
 		}
-		error = ubc_fault_page(ufi, umap, pg, prot, access_type, va);
+	    } else {
+		if (__predict_false(uobj != ouobj)) {
+			uobj = ouobj;
+			mutex_enter(&uobj->vmobjlock);
+		}
+	    }
+		KASSERT(pg == PGO_ZERO || uobj == pg->uobject);
+		error = ubc_fault_page(ufi, umap, uobj, pg, prot, access_type, va);
 		if (error) {
 			/*
 			 * Flush (there might be pages entered), drop the lock,

Index: src/sys/uvm/uvm_fault.c
diff -u src/sys/uvm/uvm_fault.c:1.166.2.23 src/sys/uvm/uvm_fault.c:1.166.2.24
--- src/sys/uvm/uvm_fault.c:1.166.2.23	Fri Nov 19 01:44:47 2010
+++ src/sys/uvm/uvm_fault.c	Sun Nov 21 12:42:59 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_fault.c,v 1.166.2.23 2010/11/19 01:44:47 uebayasi Exp $	*/
+/*	$NetBSD: uvm_fault.c,v 1.166.2.24 2010/11/21 12:42:59 uebayasi Exp $	*/
 
 /*
  *
@@ -39,7 +39,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.166.2.23 2010/11/19 01:44:47 uebayasi Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.166.2.24 2010/11/21 12:42:59 uebayasi Exp $");
 
 #include "opt_uvmhist.h"
 #include "opt_xip.h"
@@ -564,7 +564,7 @@
 		opg = oanon->an_page;
 		KASSERT(opg != NULL);
 		KASSERT(opg->uobject == NULL || opg->loan_count > 0);
-	} else if (uobjpage != PGO_DONTCARE) {
+	} else if (uobjpage != PGO_DONTCARE && uobjpage != PGO_ZERO) {
 		/* object-backed COW */
 		opg = uobjpage;
 	} else {
@@ -579,7 +579,7 @@
 
 	KASSERT(amap != NULL);
 	KASSERT(uobjpage != NULL);
-	KASSERT(uobjpage == PGO_DONTCARE || (uobjpage->flags & PG_BUSY) != 0);
+	KASSERT(uobjpage == PGO_DONTCARE || uobjpage == PGO_ZERO || (uobjpage->flags & PG_BUSY) != 0);
 	KASSERT(mutex_owned(&amap->am_l));
 	KASSERT(oanon == NULL || mutex_owned(&oanon->an_lock));
 	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
@@ -1654,7 +1654,7 @@
 	 */
 	KASSERT(amap == NULL || mutex_owned(&amap->am_l));
 	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
-	KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	KASSERT(uobj == NULL || uobjpage == PGO_ZERO || (uobjpage->flags & PG_BUSY) != 0);
 
 	/*
 	 * notes:
@@ -1664,8 +1664,8 @@
 	 *  - at this point uobjpage could be PG_WANTED (handle later)
 	 */
 
-	KASSERT(uobj == NULL || uobj == uobjpage->uobject);
-	KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) ||
+	KASSERT(uobj == NULL || uobjpage == PGO_ZERO || (uobjpage->flags & PG_DEVICE) != 0 || uobj == uobjpage->uobject);
+	KASSERT(uobj == NULL || uobjpage == PGO_ZERO || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) ||
 	    (uobjpage->flags & PG_CLEAN) != 0);
 
 	if (flt->promote == false) {
@@ -1865,13 +1865,17 @@
 
 	/* locked: pg */
 
+	KASSERT(pg != 0);
+
+    if (pg != PGO_ZERO) {
 	KASSERT((pg->flags & PG_BUSY) != 0);
 
-	if ((pg->flags & PG_DEVICE) == 0) {
+	if (pg != PGO_ZERO && (pg->flags & PG_DEVICE) == 0) {
 		mutex_enter(&uvm_pageqlock);
 		uvm_pageactivate(pg);
 		mutex_exit(&uvm_pageqlock);
 	}
+    }
 
 	/*
 	 * re-verify the state of the world by first trying to relock
@@ -1882,8 +1886,12 @@
 	if (locked && amap)
 		amap_lock(amap);
 
+    if (pg != PGO_ZERO) {
 	/* might be changed */
 	uobj = pg->uobject;
+    } else {
+	/* XIP hole page is shared, and it has no single owner */
+    }
 
 	mutex_enter(&uobj->vmobjlock);
 
@@ -1896,7 +1904,7 @@
 	 * we unlock and clean up.
 	 */
 
-	if ((pg->flags & PG_RELEASED) != 0 ||
+	if ((pg != PGO_ZERO && (pg->flags & PG_RELEASED) != 0) ||
 	    (locked && amap && amap_lookup(&ufi->entry->aref,
 	      ufi->orig_rvaddr - ufi->entry->start))) {
 		if (locked)
@@ -1912,6 +1920,7 @@
 		UVMHIST_LOG(maphist,
 		    "  wasn't able to relock after fault: retry",
 		    0,0,0,0);
+	    if (pg != PGO_ZERO) {
 		if (pg->flags & PG_WANTED) {
 			wakeup(pg);
 		}
@@ -1923,6 +1932,7 @@
 		}
 		pg->flags &= ~(PG_BUSY|PG_WANTED);
 		UVM_PAGE_OWN(pg, NULL);
+	    }
 		mutex_exit(&uobj->vmobjlock);
 		return ERESTART;
 	}
@@ -1966,6 +1976,7 @@
 
 	uvmexp.flt_obj++;
 	if (UVM_ET_ISCOPYONWRITE(ufi->entry) ||
+	    uobjpage == PGO_ZERO ||
 	    UVM_OBJ_NEEDS_WRITEFAULT(uobjpage->uobject))
 		flt->enter_prot &= ~VM_PROT_WRITE;
 	pg = uobjpage;		/* map in the actual object */
@@ -1977,7 +1988,7 @@
 	 * about writing to loaned pages...
 	 */
 
-	if (uobjpage->loan_count) {
+	if (pg != PGO_ZERO && uobjpage->loan_count) {
 		uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage);
 	}
 	KASSERT(pg == uobjpage);
@@ -2079,7 +2090,7 @@
 	 * fill in the data
 	 */
 
-	if (uobjpage != PGO_DONTCARE) {
+	if (uobjpage != PGO_DONTCARE && uobjpage != PGO_ZERO) {
 		uvmexp.flt_prcopy++;
 
 		/*
@@ -2150,9 +2161,21 @@
 	 */
 	KASSERT(amap == NULL || mutex_owned(&amap->am_l));
 	KASSERT(uobj == NULL || mutex_owned(&uobj->vmobjlock));
-	KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
+	KASSERT(uobj == NULL || uobjpage == PGO_ZERO || (uobjpage->flags & PG_BUSY) != 0);
 	KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
-	KASSERT((pg->flags & PG_BUSY) != 0);
+	KASSERT(pg == PGO_ZERO || (pg->flags & PG_BUSY) != 0);
+
+#ifdef XIP
+	if (pg == PGO_ZERO) {
+		UVMHIST_LOG(maphist, "replacing PGO_ZERO with zeropage",0,0,0,0);
+		pg = uvm_page_zeropage_alloc();
+		UVMHIST_LOG(maphist,
+		    "PGO_ZERO replaced with pg %p (phys_addr=0x%lx)",
+		    pg, VM_PAGE_TO_PHYS(pg), 0, 0);
+		KASSERT(pg != NULL);
+		KASSERT((pg->flags & PG_RDONLY) != 0);
+	}
+#endif
 
 	/*
 	 * all resources are present.   we can now map it in and free our
@@ -2176,6 +2199,7 @@
 		 * as the map may change while we're asleep.
 		 */
 
+	    if (pg != uvm_page_zeropage) {
 		if (pg->flags & PG_WANTED)
 			wakeup(pg);
 
@@ -2187,6 +2211,7 @@
 
 		pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
 		UVM_PAGE_OWN(pg, NULL);
+	    }
 
 		uvmfault_unlockall(ufi, amap, uobj, anon);
 		if (!uvm_reclaimable()) {
@@ -2201,11 +2226,13 @@
 		return ERESTART;
 	}
 
+    if (pg != uvm_page_zeropage) {
 	if (__predict_true((pg->flags & PG_DEVICE) == 0))
 		uvm_fault_lower_done(ufi, flt, uobj, anon, pg);
 
 	pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
 	UVM_PAGE_OWN(pg, NULL);
+    }
 
 	pmap_update(ufi->orig_map->pmap);
 	uvmfault_unlockall(ufi, amap, uobj, anon);
@@ -2227,6 +2254,8 @@
 
 	UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist);
 
+	KASSERT(pg != uvm_page_zeropage);
+
 	mutex_enter(&uvm_pageqlock);
 	if (flt->wire_paging) {
 		uvm_pagewire(pg);

Index: src/sys/uvm/uvm_pager.h
diff -u src/sys/uvm/uvm_pager.h:1.38.14.1 src/sys/uvm/uvm_pager.h:1.38.14.2
--- src/sys/uvm/uvm_pager.h:1.38.14.1	Fri Oct 22 07:22:57 2010
+++ src/sys/uvm/uvm_pager.h	Sun Nov 21 12:42:59 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pager.h,v 1.38.14.1 2010/10/22 07:22:57 uebayasi Exp $	*/
+/*	$NetBSD: uvm_pager.h,v 1.38.14.2 2010/11/21 12:42:59 uebayasi Exp $	*/
 
 /*
  *
@@ -168,6 +168,8 @@
 
 /* page we are not interested in getting */
 #define PGO_DONTCARE ((struct vm_page *) -1L)	/* [get only] */
+/* page redirected to zero page (XIP hole page) */
+#define PGO_ZERO ((struct vm_page *) -2L)	/* [get only] */
 
 #ifdef _KERNEL
 

Reply via email to