Module Name:    src
Committed By:   chs
Date:           Tue Sep 20 14:01:33 UTC 2011

Modified Files:
        src/sys/ufs/ffs: ffs_alloc.c
        src/sys/ufs/lfs: lfs_vnops.c
        src/sys/ufs/ufs: ufs_inode.c

Log Message:
strengthen the assertions about pages existing during block allocation,
which were incorrectly relaxed last year.  add some comments so that
the intent of these is hopefully clearer.

in ufs_balloc_range(), don't free pages or mark them dirty if
allocating their backing store failed.  this fixes PR 45369.


To generate a diff of this commit:
cvs rdiff -u -r1.128 -r1.129 src/sys/ufs/ffs/ffs_alloc.c
cvs rdiff -u -r1.237 -r1.238 src/sys/ufs/lfs/lfs_vnops.c
cvs rdiff -u -r1.87 -r1.88 src/sys/ufs/ufs/ufs_inode.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/ufs/ffs/ffs_alloc.c
diff -u src/sys/ufs/ffs/ffs_alloc.c:1.128 src/sys/ufs/ffs/ffs_alloc.c:1.129
--- src/sys/ufs/ffs/ffs_alloc.c:1.128	Sun Jun 12 03:36:00 2011
+++ src/sys/ufs/ffs/ffs_alloc.c	Tue Sep 20 14:01:32 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: ffs_alloc.c,v 1.128 2011/06/12 03:36:00 rmind Exp $	*/
+/*	$NetBSD: ffs_alloc.c,v 1.129 2011/09/20 14:01:32 chs Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -70,11 +70,12 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.128 2011/06/12 03:36:00 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.129 2011/09/20 14:01:32 chs Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
 #include "opt_quota.h"
+#include "opt_uvm_page_trkown.h"
 #endif
 
 #include <sys/param.h>
@@ -100,6 +101,10 @@
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
+#ifdef UVM_PAGE_TRKOWN
+#include <uvm/uvm.h>
+#endif
+
 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int);
 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int);
 static ino_t ffs_dirpref(struct inode *);
@@ -184,17 +189,36 @@
 	KASSERT(mutex_owned(&ump->um_lock));
 
 #ifdef UVM_PAGE_TRKOWN
+
+	/*
+	 * Sanity-check that allocations within the file size
+	 * do not allow other threads to read the stale contents
+	 * of newly allocated blocks.
+	 * Usually pages will exist to cover the new allocation.
+	 * There is an optimization in ffs_write() where we skip
+	 * creating pages if several conditions are met:
+	 *  - the file must not be mapped (in any user address space).
+	 *  - the write must cover whole pages and whole blocks.
+	 * If those conditions are not met then pages must exist and
+	 * be locked by the current thread.
+	 */
+
 	if (ITOV(ip)->v_type == VREG &&
 	    lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) {
 		struct vm_page *pg;
-		struct uvm_object *uobj = &ITOV(ip)->v_uobj;
+		struct vnode *vp = ITOV(ip);
+		struct uvm_object *uobj = &vp->v_uobj;
 		voff_t off = trunc_page(lblktosize(fs, lbn));
 		voff_t endoff = round_page(lblktosize(fs, lbn) + size);
 
 		mutex_enter(uobj->vmobjlock);
 		while (off < endoff) {
 			pg = uvm_pagelookup(uobj, off);
-			KASSERT(pg == NULL || pg->owner == curproc->p_pid);
+			KASSERT((pg == NULL && (vp->v_vflag & VV_MAPPED) == 0 &&
+				 (size & PAGE_MASK) == 0 && 
+				 blkoff(fs, size) == 0) ||
+				(pg != NULL && pg->owner == curproc->p_pid &&
+				 pg->lowner == curlwp->l_lid));
 			off += PAGE_SIZE;
 		}
 		mutex_exit(uobj->vmobjlock);
@@ -292,6 +316,18 @@
 	KASSERT(mutex_owned(&ump->um_lock));
 
 #ifdef UVM_PAGE_TRKOWN
+
+	/*
+	 * Sanity-check that allocations within the file size
+	 * do not allow other threads to read the stale contents
+	 * of newly allocated blocks.
+	 * Unlike in ffs_alloc(), here pages must always exist
+	 * for such allocations, because only the last block of a file
+	 * can be a fragment and ffs_write() will reallocate the
+	 * fragment to the new size using ufs_balloc_range(),
+	 * which always creates pages to cover blocks it allocates.
+	 */
+
 	if (ITOV(ip)->v_type == VREG) {
 		struct vm_page *pg;
 		struct uvm_object *uobj = &ITOV(ip)->v_uobj;
@@ -301,8 +337,8 @@
 		mutex_enter(uobj->vmobjlock);
 		while (off < endoff) {
 			pg = uvm_pagelookup(uobj, off);
-			KASSERT(pg == NULL || pg->owner == curproc->p_pid);
-			KASSERT((pg->flags & PG_CLEAN) == 0);
+			KASSERT(pg->owner == curproc->p_pid &&
+				pg->lowner == curlwp->l_lid);
 			off += PAGE_SIZE;
 		}
 		mutex_exit(uobj->vmobjlock);

Index: src/sys/ufs/lfs/lfs_vnops.c
diff -u src/sys/ufs/lfs/lfs_vnops.c:1.237 src/sys/ufs/lfs/lfs_vnops.c:1.238
--- src/sys/ufs/lfs/lfs_vnops.c:1.237	Tue Jul 12 16:59:48 2011
+++ src/sys/ufs/lfs/lfs_vnops.c	Tue Sep 20 14:01:33 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: lfs_vnops.c,v 1.237 2011/07/12 16:59:48 dholland Exp $	*/
+/*	$NetBSD: lfs_vnops.c,v 1.238 2011/09/20 14:01:33 chs Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
@@ -60,10 +60,11 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.237 2011/07/12 16:59:48 dholland Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.238 2011/09/20 14:01:33 chs Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
+#include "opt_uvm_page_trkown.h"
 #endif
 
 #include <sys/param.h>

Index: src/sys/ufs/ufs/ufs_inode.c
diff -u src/sys/ufs/ufs/ufs_inode.c:1.87 src/sys/ufs/ufs/ufs_inode.c:1.88
--- src/sys/ufs/ufs/ufs_inode.c:1.87	Sun Jun 12 03:36:02 2011
+++ src/sys/ufs/ufs/ufs_inode.c	Tue Sep 20 14:01:33 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: ufs_inode.c,v 1.87 2011/06/12 03:36:02 rmind Exp $	*/
+/*	$NetBSD: ufs_inode.c,v 1.88 2011/09/20 14:01:33 chs Exp $	*/
 
 /*
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.87 2011/06/12 03:36:02 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.88 2011/09/20 14:01:33 chs Exp $");
 
 #if defined(_KERNEL_OPT)
 #include "opt_ffs.h"
@@ -269,16 +269,6 @@
 	if (error) {
 		goto out;
 	}
-	mutex_enter(uobj->vmobjlock);
-	mutex_enter(&uvm_pageqlock);
-	for (i = 0; i < npages; i++) {
-		UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
-		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
-		pgs[i]->flags &= ~PG_CLEAN;
-		uvm_pageactivate(pgs[i]);
-	}
-	mutex_exit(&uvm_pageqlock);
-	mutex_exit(uobj->vmobjlock);
 
 	/*
 	 * now allocate the range.
@@ -288,27 +278,31 @@
 	genfs_node_unlock(vp);
 
 	/*
-	 * clear PG_RDONLY on any pages we are holding
-	 * (since they now have backing store) and unbusy them.
+	 * if the allocation succeeded, clear PG_CLEAN on all the pages
+	 * and clear PG_RDONLY on any pages that are now fully backed
+	 * by disk blocks.  if the allocation failed, we do not invalidate
+	 * the pages since they might have already existed and been dirty,
+	 * in which case we need to keep them around.  if we created the pages,
+	 * they will be clean and read-only, and leaving such pages
+	 * in the cache won't cause any problems.
 	 */
 
 	GOP_SIZE(vp, off + len, &eob, 0);
 	mutex_enter(uobj->vmobjlock);
+	mutex_enter(&uvm_pageqlock);
 	for (i = 0; i < npages; i++) {
-		if (off <= pagestart + (i << PAGE_SHIFT) &&
-		    pagestart + ((i + 1) << PAGE_SHIFT) <= eob) {
-			pgs[i]->flags &= ~PG_RDONLY;
-		} else if (error) {
-			pgs[i]->flags |= PG_RELEASED;
+		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
+		if (!error) {
+			if (off <= pagestart + (i << PAGE_SHIFT) &&
+			    pagestart + ((i + 1) << PAGE_SHIFT) <= eob) {
+				pgs[i]->flags &= ~PG_RDONLY;
+			}
+			pgs[i]->flags &= ~PG_CLEAN;
 		}
+		uvm_pageactivate(pgs[i]);
 	}
-	if (error) {
-		mutex_enter(&uvm_pageqlock);
-		uvm_page_unbusy(pgs, npages);
-		mutex_exit(&uvm_pageqlock);
-	} else {
-		uvm_page_unbusy(pgs, npages);
-	}
+	mutex_exit(&uvm_pageqlock);
+	uvm_page_unbusy(pgs, npages);
 	mutex_exit(uobj->vmobjlock);
 
  out:

Reply via email to