The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=149674bbac5842ac883414a6c1e75d829c70d42b

commit 149674bbac5842ac883414a6c1e75d829c70d42b
Author:     Konstantin Belousov <k...@freebsd.org>
AuthorDate: 2025-07-23 10:44:29 +0000
Commit:     Konstantin Belousov <k...@freebsd.org>
CommitDate: 2025-09-14 20:00:36 +0000

    vm_fault: try to only share-busy page for soft faults
    
    If the fault handler found a vaild page that is definitely not going to be
    renamed for COW, try to only sbusy the page.  We do not need to validate
    the page, and parallel faults on the same address are excluded by the
    xbusy state of the page from the top object.
    
    Reviewed by:    alc, markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D51474
---
 sys/vm/vm_fault.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 93 insertions(+), 15 deletions(-)

diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 27b5f72ec945..427c18c63eb7 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -202,7 +202,10 @@ vm_fault_page_release(vm_page_t *mp)
                 * pageout while optimizing fault restarts.
                 */
                vm_page_deactivate(m);
-               vm_page_xunbusy(m);
+               if (vm_page_xbusied(m))
+                       vm_page_xunbusy(m);
+               else
+                       vm_page_sunbusy(m);
                *mp = NULL;
        }
 }
@@ -333,6 +336,13 @@ vm_fault_dirty(struct faultstate *fs, vm_page_t m)
 
 }
 
+static bool
+vm_fault_is_read(const struct faultstate *fs)
+{
+       return ((fs->prot & VM_PROT_WRITE) == 0 &&
+           (fs->fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) == 0);
+}
+
 /*
  * Unlocks fs.first_object and fs.map on success.
  */
@@ -1019,7 +1029,7 @@ vm_fault_can_cow_rename(struct faultstate *fs)
 static void
 vm_fault_cow(struct faultstate *fs)
 {
-       bool is_first_object_locked;
+       bool is_first_object_locked, rename_cow;
 
        KASSERT(vm_fault_might_be_cow(fs),
            ("source and target COW objects are identical"));
@@ -1033,13 +1043,29 @@ vm_fault_cow(struct faultstate *fs)
         * object so that it will go out to swap when needed.
         */
        is_first_object_locked = false;
-       if (vm_fault_can_cow_rename(fs) &&
-           /*
-            * We don't chase down the shadow chain and we can acquire locks.
-            */
-           (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object)) &&
-           fs->object == fs->first_object->backing_object &&
-           VM_OBJECT_TRYWLOCK(fs->object)) {
+       rename_cow = false;
+
+       if (vm_fault_can_cow_rename(fs) && vm_page_xbusied(fs->m)) {
+               /*
+                * Check that we don't chase down the shadow chain and
+                * we can acquire locks.  Recheck the conditions for
+                * rename after the shadow chain is stable after the
+                * object locking.
+                */
+               is_first_object_locked = VM_OBJECT_TRYWLOCK(fs->first_object);
+               if (is_first_object_locked &&
+                   fs->object == fs->first_object->backing_object) {
+                       if (VM_OBJECT_TRYWLOCK(fs->object)) {
+                               rename_cow = vm_fault_can_cow_rename(fs);
+                               if (!rename_cow)
+                                       VM_OBJECT_WUNLOCK(fs->object);
+                       }
+               }
+       }
+
+       if (rename_cow) {
+               vm_page_assert_xbusied(fs->m);
+
                /*
                 * Remove but keep xbusy for replace.  fs->m is moved into
                 * fs->first_object and left busy while fs->first_m is
@@ -1096,8 +1122,12 @@ vm_fault_cow(struct faultstate *fs)
                 * address space.  If OBJ_ONEMAPPING is set after the check,
                 * removing mappings will at worse trigger some unnecessary page
                 * faults.
+                *
+                * In the fs->m shared busy case, the xbusy state of
+                * fs->first_m prevents new mappings of fs->m from
+                * being created because a parallel fault on this
+                * shadow chain should wait for xbusy on fs->first_m.
                 */
-               vm_page_assert_xbusied(fs->m_cow);
                if ((fs->first_object->flags & OBJ_ONEMAPPING) == 0)
                        pmap_remove_all(fs->m_cow);
        }
@@ -1493,6 +1523,51 @@ vm_fault_object(struct faultstate *fs, int *behindp, int 
*aheadp)
        vm_page_iter_init(&pages, fs->object);
        fs->m = vm_radix_iter_lookup(&pages, fs->pindex);
        if (fs->m != NULL) {
+               /*
+                * If the found page is valid, will be either shadowed
+                * or mapped read-only, and will not be renamed for
+                * COW, then busy it in shared mode.  This allows
+                * other faults needing this page to proceed in
+                * parallel.
+                *
+                * Unlocked check for validity, rechecked after busy
+                * is obtained.
+                */
+               if (vm_page_all_valid(fs->m) &&
+                   /*
+                    * No write permissions for the new fs->m mapping,
+                    * or the first object has only one mapping, so
+                    * other writeable COW mappings of fs->m cannot
+                    * appear under us.
+                    */
+                   (vm_fault_is_read(fs) || vm_fault_might_be_cow(fs)) &&
+                   /*
+                    * fs->m cannot be renamed from object to
+                    * first_object.  These conditions will be
+                    * re-checked with proper synchronization in
+                    * vm_fault_cow().
+                    */
+                   (!vm_fault_can_cow_rename(fs) ||
+                   fs->object != fs->first_object->backing_object)) {
+                       if (!vm_page_trysbusy(fs->m)) {
+                               vm_fault_busy_sleep(fs, VM_ALLOC_SBUSY);
+                               return (FAULT_RESTART);
+                       }
+
+                       /*
+                        * Now make sure that racily checked
+                        * conditions are still valid.
+                        */
+                       if (__predict_true(vm_page_all_valid(fs->m) &&
+                           (vm_fault_is_read(fs) ||
+                           vm_fault_might_be_cow(fs)))) {
+                               VM_OBJECT_UNLOCK(fs->object);
+                               return (FAULT_SOFT);
+                       }
+
+                       vm_page_sunbusy(fs->m);
+               }
+
                if (!vm_page_tryxbusy(fs->m)) {
                        vm_fault_busy_sleep(fs, 0);
                        return (FAULT_RESTART);
@@ -1707,10 +1782,10 @@ RetryFault:
 
 found:
        /*
-        * A valid page has been found and exclusively busied.  The
-        * object lock must no longer be held.
+        * A valid page has been found and busied.  The object lock
+        * must no longer be held if the page was busied.
         */
-       vm_page_assert_xbusied(fs.m);
+       vm_page_assert_busied(fs.m);
        VM_OBJECT_ASSERT_UNLOCKED(fs.object);
 
        /*
@@ -1779,7 +1854,7 @@ found:
         * Page must be completely valid or it is not fit to
         * map into user space.  vm_pager_get_pages() ensures this.
         */
-       vm_page_assert_xbusied(fs.m);
+       vm_page_assert_busied(fs.m);
        KASSERT(vm_page_all_valid(fs.m),
            ("vm_fault: page %p partially invalid", fs.m));
 
@@ -1811,7 +1886,10 @@ found:
                (*fs.m_hold) = fs.m;
                vm_page_wire(fs.m);
        }
-       vm_page_xunbusy(fs.m);
+       if (vm_page_xbusied(fs.m))
+               vm_page_xunbusy(fs.m);
+       else
+               vm_page_sunbusy(fs.m);
        fs.m = NULL;
 
        /*

Reply via email to