Module Name: src Committed By: chs Date: Tue Sep 20 14:01:33 UTC 2011
Modified Files: src/sys/ufs/ffs: ffs_alloc.c src/sys/ufs/lfs: lfs_vnops.c src/sys/ufs/ufs: ufs_inode.c Log Message: strengthen the assertions about pages existing during block allocation, which were incorrectly relaxed last year. add some comments so that the intent of these is hopefully clearer. in ufs_balloc_range(), don't free pages or mark them dirty if allocating their backing store failed. this fixes PR 45369. To generate a diff of this commit: cvs rdiff -u -r1.128 -r1.129 src/sys/ufs/ffs/ffs_alloc.c cvs rdiff -u -r1.237 -r1.238 src/sys/ufs/lfs/lfs_vnops.c cvs rdiff -u -r1.87 -r1.88 src/sys/ufs/ufs/ufs_inode.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/ufs/ffs/ffs_alloc.c diff -u src/sys/ufs/ffs/ffs_alloc.c:1.128 src/sys/ufs/ffs/ffs_alloc.c:1.129 --- src/sys/ufs/ffs/ffs_alloc.c:1.128 Sun Jun 12 03:36:00 2011 +++ src/sys/ufs/ffs/ffs_alloc.c Tue Sep 20 14:01:32 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_alloc.c,v 1.128 2011/06/12 03:36:00 rmind Exp $ */ +/* $NetBSD: ffs_alloc.c,v 1.129 2011/09/20 14:01:32 chs Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -70,11 +70,12 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.128 2011/06/12 03:36:00 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.129 2011/09/20 14:01:32 chs Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" #include "opt_quota.h" +#include "opt_uvm_page_trkown.h" #endif #include <sys/param.h> @@ -100,6 +101,10 @@ #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> +#ifdef UVM_PAGE_TRKOWN +#include <uvm/uvm.h> +#endif + static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int); static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int); static ino_t ffs_dirpref(struct inode *); @@ -184,17 +189,36 @@ KASSERT(mutex_owned(&ump->um_lock)); #ifdef UVM_PAGE_TRKOWN + + /* + * Sanity-check that allocations within the file size + * do not allow other threads to read the stale contents + * of newly allocated blocks. + * Usually pages will exist to cover the new allocation. + * There is an optimization in ffs_write() where we skip + * creating pages if several conditions are met: + * - the file must not be mapped (in any user address space). + * - the write must cover whole pages and whole blocks. + * If those conditions are not met then pages must exist and + * be locked by the current thread. + */ + if (ITOV(ip)->v_type == VREG && lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) { struct vm_page *pg; - struct uvm_object *uobj = &ITOV(ip)->v_uobj; + struct vnode *vp = ITOV(ip); + struct uvm_object *uobj = &vp->v_uobj; voff_t off = trunc_page(lblktosize(fs, lbn)); voff_t endoff = round_page(lblktosize(fs, lbn) + size); mutex_enter(uobj->vmobjlock); while (off < endoff) { pg = uvm_pagelookup(uobj, off); - KASSERT(pg == NULL || pg->owner == curproc->p_pid); + KASSERT((pg == NULL && (vp->v_vflag & VV_MAPPED) == 0 && + (size & PAGE_MASK) == 0 && + blkoff(fs, size) == 0) || + (pg != NULL && pg->owner == curproc->p_pid && + pg->lowner == curlwp->l_lid)); off += PAGE_SIZE; } mutex_exit(uobj->vmobjlock); @@ -292,6 +316,18 @@ KASSERT(mutex_owned(&ump->um_lock)); #ifdef UVM_PAGE_TRKOWN + + /* + * Sanity-check that allocations within the file size + * do not allow other threads to read the stale contents + * of newly allocated blocks. + * Unlike in ffs_alloc(), here pages must always exist + * for such allocations, because only the last block of a file + * can be a fragment and ffs_write() will reallocate the + * fragment to the new size using ufs_balloc_range(), + * which always creates pages to cover blocks it allocates. + */ + if (ITOV(ip)->v_type == VREG) { struct vm_page *pg; struct uvm_object *uobj = &ITOV(ip)->v_uobj; @@ -301,8 +337,8 @@ mutex_enter(uobj->vmobjlock); while (off < endoff) { pg = uvm_pagelookup(uobj, off); - KASSERT(pg == NULL || pg->owner == curproc->p_pid); - KASSERT((pg->flags & PG_CLEAN) == 0); + KASSERT(pg->owner == curproc->p_pid && + pg->lowner == curlwp->l_lid); off += PAGE_SIZE; } mutex_exit(uobj->vmobjlock); Index: src/sys/ufs/lfs/lfs_vnops.c diff -u src/sys/ufs/lfs/lfs_vnops.c:1.237 src/sys/ufs/lfs/lfs_vnops.c:1.238 --- src/sys/ufs/lfs/lfs_vnops.c:1.237 Tue Jul 12 16:59:48 2011 +++ src/sys/ufs/lfs/lfs_vnops.c Tue Sep 20 14:01:33 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_vnops.c,v 1.237 2011/07/12 16:59:48 dholland Exp $ */ +/* $NetBSD: lfs_vnops.c,v 1.238 2011/09/20 14:01:33 chs Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. @@ -60,10 +60,11 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.237 2011/07/12 16:59:48 dholland Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.238 2011/09/20 14:01:33 chs Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" +#include "opt_uvm_page_trkown.h" #endif #include <sys/param.h> Index: src/sys/ufs/ufs/ufs_inode.c diff -u src/sys/ufs/ufs/ufs_inode.c:1.87 src/sys/ufs/ufs/ufs_inode.c:1.88 --- src/sys/ufs/ufs/ufs_inode.c:1.87 Sun Jun 12 03:36:02 2011 +++ src/sys/ufs/ufs/ufs_inode.c Tue Sep 20 14:01:33 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_inode.c,v 1.87 2011/06/12 03:36:02 rmind Exp $ */ +/* $NetBSD: ufs_inode.c,v 1.88 2011/09/20 14:01:33 chs Exp $ */ /* * Copyright (c) 1991, 1993 @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.87 2011/06/12 03:36:02 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.88 2011/09/20 14:01:33 chs Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -269,16 +269,6 @@ if (error) { goto out; } - mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); - for (i = 0; i < npages; i++) { - UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0); - KASSERT((pgs[i]->flags & PG_RELEASED) == 0); - pgs[i]->flags &= ~PG_CLEAN; - uvm_pageactivate(pgs[i]); - } - mutex_exit(&uvm_pageqlock); - mutex_exit(uobj->vmobjlock); /* * now allocate the range. @@ -288,27 +278,31 @@ genfs_node_unlock(vp); /* - * clear PG_RDONLY on any pages we are holding - * (since they now have backing store) and unbusy them. + * if the allocation succeeded, clear PG_CLEAN on all the pages + * and clear PG_RDONLY on any pages that are now fully backed + * by disk blocks. if the allocation failed, we do not invalidate + * the pages since they might have already existed and been dirty, + * in which case we need to keep them around. if we created the pages, + * they will be clean and read-only, and leaving such pages + * in the cache won't cause any problems. */ GOP_SIZE(vp, off + len, &eob, 0); mutex_enter(uobj->vmobjlock); + mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { - if (off <= pagestart + (i << PAGE_SHIFT) && - pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { - pgs[i]->flags &= ~PG_RDONLY; - } else if (error) { - pgs[i]->flags |= PG_RELEASED; + KASSERT((pgs[i]->flags & PG_RELEASED) == 0); + if (!error) { + if (off <= pagestart + (i << PAGE_SHIFT) && + pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { + pgs[i]->flags &= ~PG_RDONLY; + } + pgs[i]->flags &= ~PG_CLEAN; } + uvm_pageactivate(pgs[i]); } - if (error) { - mutex_enter(&uvm_pageqlock); - uvm_page_unbusy(pgs, npages); - mutex_exit(&uvm_pageqlock); - } else { - uvm_page_unbusy(pgs, npages); - } + mutex_exit(&uvm_pageqlock); + uvm_page_unbusy(pgs, npages); mutex_exit(uobj->vmobjlock); out: