Author: kib
Date: Thu Mar 15 11:06:37 2012
New Revision: 233001
URL: http://svn.freebsd.org/changeset/base/233001

Log:
  MFC r232071:
  Account the writeable shared mappings backed by file in the vnode
  v_writecount.
  
  MFC r232103:
  Place the if() at the right location.
  
  MFC note: the added struct vm_object un_pager.vnp.writemappings member
  is located after the fields of struct vm_object that could be accessed
  from the modules.

Modified:
  stable/9/sys/vm/vm_map.c
  stable/9/sys/vm/vm_map.h
  stable/9/sys/vm/vm_mmap.c
  stable/9/sys/vm/vm_object.h
  stable/9/sys/vm/vnode_pager.c
  stable/9/sys/vm/vnode_pager.h
Directory Properties:
  stable/9/sys/   (props changed)

Modified: stable/9/sys/vm/vm_map.c
==============================================================================
--- stable/9/sys/vm/vm_map.c    Thu Mar 15 08:39:10 2012        (r233000)
+++ stable/9/sys/vm/vm_map.c    Thu Mar 15 11:06:37 2012        (r233001)
@@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
+#include <vm/vnode_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/uma.h>
 
@@ -475,11 +476,23 @@ vm_map_process_deferred(void)
 {
        struct thread *td;
        vm_map_entry_t entry;
+       vm_object_t object;
 
        td = curthread;
-
        while ((entry = td->td_map_def_user) != NULL) {
                td->td_map_def_user = entry->next;
+               if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) {
+                       /*
+                        * Decrement the object's writemappings and
+                        * possibly the vnode's v_writecount.
+                        */
+                       KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
+                           ("Submap with writecount"));
+                       object = entry->object.vm_object;
+                       KASSERT(object != NULL, ("No object for writecount"));
+                       vnode_pager_release_writecount(object, entry->start,
+                           entry->end);
+               }
                vm_map_entry_deallocate(entry, FALSE);
        }
 }
@@ -1174,6 +1187,8 @@ vm_map_insert(vm_map_t map, vm_object_t 
                protoeflags |= MAP_ENTRY_NOSYNC;
        if (cow & MAP_DISABLE_COREDUMP)
                protoeflags |= MAP_ENTRY_NOCOREDUMP;
+       if (cow & MAP_VN_WRITECOUNT)
+               protoeflags |= MAP_ENTRY_VN_WRITECNT;
        if (cow & MAP_INHERIT_SHARE)
                inheritance = VM_INHERIT_SHARE;
        else
@@ -1516,6 +1531,11 @@ vm_map_simplify_entry(vm_map_t map, vm_m
                         * references.  Thus, the map lock can be kept
                         * without causing a lock-order reversal with
                         * the vnode lock.
+                        *
+                        * Since we count the number of virtual page
+                        * mappings in object->un_pager.vnp.writemappings,
+                        * the writemappings value should not be adjusted
+                        * when the entry is disposed of.
                         */
                        if (prev->object.vm_object)
                                vm_object_deallocate(prev->object.vm_object);
@@ -1627,6 +1647,13 @@ _vm_map_clip_start(vm_map_t map, vm_map_
 
        if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
                vm_object_reference(new_entry->object.vm_object);
+               /*
+                * The object->un_pager.vnp.writemappings for the
+                * object of MAP_ENTRY_VN_WRITECNT type entry shall be
+                * kept as is here.  The virtual pages are
+                * re-distributed among the clipped entries, so the sum is
+                * left the same.
+                */
        }
 }
 
@@ -2900,6 +2927,7 @@ vm_map_copy_entry(
        vm_ooffset_t *fork_charge)
 {
        vm_object_t src_object;
+       vm_map_entry_t fake_entry;
        vm_offset_t size;
        struct ucred *cred;
        int charged;
@@ -2965,6 +2993,27 @@ vm_map_copy_entry(
                        src_entry->eflags |= 
(MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
                        dst_entry->eflags |= 
(MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
                        dst_entry->offset = src_entry->offset;
+                       if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
+                               /*
+                                * MAP_ENTRY_VN_WRITECNT cannot
+                                * indicate write reference from
+                                * src_entry, since the entry is
+                                * marked as needs copy.  Allocate a
+                                * fake entry that is used to
+                                * decrement object->un_pager.vnp.writecount
+                                * at the appropriate time.  Attach
+                                * fake_entry to the deferred list.
+                                */
+                               fake_entry = vm_map_entry_create(dst_map);
+                               fake_entry->eflags = MAP_ENTRY_VN_WRITECNT;
+                               src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT;
+                               vm_object_reference(src_object);
+                               fake_entry->object.vm_object = src_object;
+                               fake_entry->start = src_entry->start;
+                               fake_entry->end = src_entry->end;
+                               fake_entry->next = curthread->td_map_def_user;
+                               curthread->td_map_def_user = fake_entry;
+                       }
                } else {
                        dst_entry->object.vm_object = NULL;
                        dst_entry->offset = 0;
@@ -3043,6 +3092,7 @@ vmspace_fork(struct vmspace *vm1, vm_oof
        vm_map_lock(old_map);
        if (old_map->busy)
                vm_map_wait_busy(old_map);
+       new_map = NULL; /* silence gcc */
        vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
        if (vm2 == NULL)
                goto unlock_and_return;
@@ -3122,6 +3172,16 @@ vmspace_fork(struct vmspace *vm1, vm_oof
                        new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
                            MAP_ENTRY_IN_TRANSITION);
                        new_entry->wired_count = 0;
+                       if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
+                               object = new_entry->object.vm_object;
+                               KASSERT(((struct vnode *)object->handle)->
+                                   v_writecount > 0,
+                                   ("vmspace_fork: v_writecount"));
+                               KASSERT(object->un_pager.vnp.writemappings > 0,
+                                   ("vmspace_fork: vnp.writecount"));
+                               vnode_pager_update_writecount(object,
+                                   new_entry->start, new_entry->end);
+                       }
 
                        /*
                         * Insert the entry into the new map -- we know we're
@@ -3146,8 +3206,11 @@ vmspace_fork(struct vmspace *vm1, vm_oof
                         */
                        new_entry = vm_map_entry_create(new_map);
                        *new_entry = *old_entry;
+                       /*
+                        * Copied entry is COW over the old object.
+                        */
                        new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
-                           MAP_ENTRY_IN_TRANSITION);
+                           MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT);
                        new_entry->wired_count = 0;
                        new_entry->object.vm_object = NULL;
                        new_entry->cred = NULL;
@@ -3161,9 +3224,15 @@ vmspace_fork(struct vmspace *vm1, vm_oof
                old_entry = old_entry->next;
        }
 unlock_and_return:
-       vm_map_unlock(old_map);
+       /*
+        * Use inlined vm_map_unlock() to postpone handling the deferred
+        * map entries, which cannot be done until both old_map and
+        * new_map locks are released.
+        */
+       sx_xunlock(&old_map->lock);
        if (vm2 != NULL)
-               vm_map_unlock(new_map);
+               sx_xunlock(&new_map->lock);
+       vm_map_process_deferred();
 
        return (vm2);
 }

Modified: stable/9/sys/vm/vm_map.h
==============================================================================
--- stable/9/sys/vm/vm_map.h    Thu Mar 15 08:39:10 2012        (r233000)
+++ stable/9/sys/vm/vm_map.h    Thu Mar 15 11:06:37 2012        (r233001)
@@ -139,6 +139,7 @@ struct vm_map_entry {
 #define        MAP_ENTRY_GROWS_UP              0x2000  /* Bottom-up stacks */
 
 #define        MAP_ENTRY_WIRE_SKIPPED          0x4000
+#define        MAP_ENTRY_VN_WRITECNT           0x8000  /* writeable vnode 
mapping */
 
 #ifdef _KERNEL
 static __inline u_char
@@ -315,6 +316,7 @@ long vmspace_wired_count(struct vmspace 
 #define MAP_DISABLE_SYNCER     0x0020
 #define MAP_DISABLE_COREDUMP   0x0100
 #define MAP_PREFAULT_MADVISE   0x0200  /* from (user) madvise request */
+#define        MAP_VN_WRITECOUNT       0x0400
 #define        MAP_STACK_GROWS_DOWN    0x1000
 #define        MAP_STACK_GROWS_UP      0x2000
 #define        MAP_ACC_CHARGED         0x4000

Modified: stable/9/sys/vm/vm_mmap.c
==============================================================================
--- stable/9/sys/vm/vm_mmap.c   Thu Mar 15 08:39:10 2012        (r233000)
+++ stable/9/sys/vm/vm_mmap.c   Thu Mar 15 11:06:37 2012        (r233001)
@@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_pageout.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
@@ -93,7 +94,7 @@ struct sbrk_args {
 #endif
 
 static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
-    int *, struct vnode *, vm_ooffset_t *, vm_object_t *);
+    int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
 static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
     int *, struct cdev *, vm_ooffset_t *, vm_object_t *);
 static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
@@ -1218,28 +1219,33 @@ sys_munlock(td, uap)
 /*
  * vm_mmap_vnode()
  *
- * MPSAFE
- *
  * Helper function for vm_mmap.  Perform sanity check specific for mmap
  * operations on vnodes.
+ *
+ * For VCHR vnodes, the vnode lock is held over the call to
+ * vm_mmap_cdev() to keep vp->v_rdev valid.
  */
 int
 vm_mmap_vnode(struct thread *td, vm_size_t objsize,
     vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
-    struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp)
+    struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
+    boolean_t *writecounted)
 {
        struct vattr va;
        vm_object_t obj;
        vm_offset_t foff;
        struct mount *mp;
        struct ucred *cred;
-       int error, flags;
-       int vfslocked;
+       int error, flags, locktype, vfslocked;
 
        mp = vp->v_mount;
        cred = td->td_ucred;
+       if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED))
+               locktype = LK_EXCLUSIVE;
+       else
+               locktype = LK_SHARED;
        vfslocked = VFS_LOCK_GIANT(mp);
-       if ((error = vget(vp, LK_SHARED, td)) != 0) {
+       if ((error = vget(vp, locktype, td)) != 0) {
                VFS_UNLOCK_GIANT(vfslocked);
                return (error);
        }
@@ -1256,8 +1262,20 @@ vm_mmap_vnode(struct thread *td, vm_size
                }
                if (obj->handle != vp) {
                        vput(vp);
-                       vp = (struct vnode*)obj->handle;
-                       vget(vp, LK_SHARED, td);
+                       vp = (struct vnode *)obj->handle;
+                       /*
+                        * Bypass filesystems obey the mpsafety of the
+                        * underlying fs.
+                        */
+                       error = vget(vp, locktype, td);
+                       if (error != 0) {
+                               VFS_UNLOCK_GIANT(vfslocked);
+                               return (error);
+                       }
+               }
+               if (locktype == LK_EXCLUSIVE) {
+                       *writecounted = TRUE;
+                       vnode_pager_update_writecount(obj, 0, objsize);
                }
        } else if (vp->v_type == VCHR) {
                error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp,
@@ -1293,7 +1311,7 @@ vm_mmap_vnode(struct thread *td, vm_size
        objsize = round_page(va.va_size);
        if (va.va_nlink == 0)
                flags |= MAP_NOSYNC;
-       obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, 
td->td_ucred);
+       obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred);
        if (obj == NULL) {
                error = ENOMEM;
                goto done;
@@ -1432,6 +1450,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
        int rv = KERN_SUCCESS;
        int docow, error;
        struct thread *td = curthread;
+       boolean_t writecounted;
 
        if (size == 0)
                return (0);
@@ -1470,6 +1489,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
                        return (EINVAL);
                fitit = FALSE;
        }
+       writecounted = FALSE;
+
        /*
         * Lookup/allocate object.
         */
@@ -1480,7 +1501,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
                break;
        case OBJT_VNODE:
                error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
-                   handle, &foff, &object);
+                   handle, &foff, &object, &writecounted);
                break;
        case OBJT_SWAP:
                error = vm_mmap_shm(td, size, prot, &maxprot, &flags,
@@ -1520,6 +1541,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
        /* Shared memory is also shared with children. */
        if (flags & MAP_SHARED)
                docow |= MAP_INHERIT_SHARE;
+       if (writecounted)
+               docow |= MAP_VN_WRITECOUNT;
 
        if (flags & MAP_STACK)
                rv = vm_map_stack(map, *addr, size, prot, maxprot,
@@ -1537,7 +1560,12 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
                 * Lose the object reference. Will destroy the
                 * object if it's an unnamed anonymous mapping
                 * or named anonymous without other references.
+                *
+                * If this mapping was accounted for in the vnode's
+                * writecount, then undo that now.
                 */
+               if (writecounted)
+                       vnode_pager_release_writecount(object, 0, size);
                vm_object_deallocate(object);
        }
 

Modified: stable/9/sys/vm/vm_object.h
==============================================================================
--- stable/9/sys/vm/vm_object.h Thu Mar 15 08:39:10 2012        (r233000)
+++ stable/9/sys/vm/vm_object.h Thu Mar 15 11:06:37 2012        (r233001)
@@ -112,6 +112,7 @@ struct vm_object {
                 */
                struct {
                        off_t vnp_size;
+                       vm_ooffset_t writemappings;
                } vnp;
 
                /*

Modified: stable/9/sys/vm/vnode_pager.c
==============================================================================
--- stable/9/sys/vm/vnode_pager.c       Thu Mar 15 08:39:10 2012        
(r233000)
+++ stable/9/sys/vm/vnode_pager.c       Thu Mar 15 11:06:37 2012        
(r233001)
@@ -222,6 +222,7 @@ retry:
                object = vm_object_allocate(OBJT_VNODE, 
OFF_TO_IDX(round_page(size)));
 
                object->un_pager.vnp.vnp_size = size;
+               object->un_pager.vnp.writemappings = 0;
 
                object->handle = handle;
                VI_LOCK(vp);
@@ -268,10 +269,16 @@ vnode_pager_dealloc(object)
                wakeup(object);
        }
        ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
+       if (object->un_pager.vnp.writemappings > 0) {
+               object->un_pager.vnp.writemappings = 0;
+               vp->v_writecount--;
+       }
        vp->v_object = NULL;
        vp->v_vflag &= ~VV_TEXT;
+       VM_OBJECT_UNLOCK(object);
        while (refs-- > 0)
                vunref(vp);
+       VM_OBJECT_LOCK(object);
 }
 
 static boolean_t
@@ -1215,3 +1222,81 @@ vnode_pager_undirty_pages(vm_page_t *ma,
        }
        VM_OBJECT_UNLOCK(obj);
 }
+
+void
+vnode_pager_update_writecount(vm_object_t object, vm_offset_t start,
+    vm_offset_t end)
+{
+       struct vnode *vp;
+       vm_ooffset_t old_wm;
+
+       VM_OBJECT_LOCK(object);
+       if (object->type != OBJT_VNODE) {
+               VM_OBJECT_UNLOCK(object);
+               return;
+       }
+       old_wm = object->un_pager.vnp.writemappings;
+       object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start;
+       vp = object->handle;
+       if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) {
+               ASSERT_VOP_ELOCKED(vp, "v_writecount inc");
+               vp->v_writecount++;
+       } else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) {
+               ASSERT_VOP_ELOCKED(vp, "v_writecount dec");
+               vp->v_writecount--;
+       }
+       VM_OBJECT_UNLOCK(object);
+}
+
+void
+vnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
+    vm_offset_t end)
+{
+       struct vnode *vp;
+       struct mount *mp;
+       vm_offset_t inc;
+       int vfslocked;
+
+       VM_OBJECT_LOCK(object);
+
+       /*
+        * First, recheck the object type to account for the race when
+        * the vnode is reclaimed.
+        */
+       if (object->type != OBJT_VNODE) {
+               VM_OBJECT_UNLOCK(object);
+               return;
+       }
+
+       /*
+        * Optimize for the case when writemappings is not going to
+        * zero.
+        */
+       inc = end - start;
+       if (object->un_pager.vnp.writemappings != inc) {
+               object->un_pager.vnp.writemappings -= inc;
+               VM_OBJECT_UNLOCK(object);
+               return;
+       }
+
+       vp = object->handle;
+       vhold(vp);
+       VM_OBJECT_UNLOCK(object);
+       vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+       mp = NULL;
+       vn_start_write(vp, &mp, V_WAIT);
+       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+
+       /*
+        * Decrement the object's writemappings, by swapping the start
+        * and end arguments for vnode_pager_update_writecount().  If
+        * there was not a race with vnode reclaimation, then the
+        * vnode's v_writecount is decremented.
+        */
+       vnode_pager_update_writecount(object, end, start);
+       VOP_UNLOCK(vp, 0);
+       vdrop(vp);
+       if (mp != NULL)
+               vn_finished_write(mp);
+       VFS_UNLOCK_GIANT(vfslocked);
+}

Modified: stable/9/sys/vm/vnode_pager.h
==============================================================================
--- stable/9/sys/vm/vnode_pager.h       Thu Mar 15 08:39:10 2012        
(r233000)
+++ stable/9/sys/vm/vnode_pager.h       Thu Mar 15 11:06:37 2012        
(r233001)
@@ -46,7 +46,11 @@ int vnode_pager_generic_putpages(struct 
                                          int count, boolean_t sync,
                                          int *rtvals);
 
+void vnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
+    vm_offset_t end);
 void vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written);
+void vnode_pager_update_writecount(vm_object_t object, vm_offset_t start,
+    vm_offset_t end);
 
 #endif                         /* _KERNEL */
 #endif                         /* _VNODE_PAGER_ */
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to