The branch main has been updated by dougm:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5b78ff830791633c02a3d906b2c8f5c9b3bb1a91

commit 5b78ff830791633c02a3d906b2c8f5c9b3bb1a91
Author:     Doug Moore <do...@freebsd.org>
AuthorDate: 2024-11-20 17:54:20 +0000
Commit:     Doug Moore <do...@freebsd.org>
CommitDate: 2024-11-20 17:54:20 +0000

    vm_page: remove pages with iterators
    
    Use pctrie iterators for removing some page sequences from radix
    trees, to avoid repeated searches from the tree root.
    
    Rename vm_page_object_remove to vm_page_remove_radixdone, and remove
    from it the responsibility for removing a page from its radix tree,
    and pass that responsibility on to its callers.
    
    For one of those callers, vm_page_rename, pass a pages pctrie_iter,
    rather than a page, and use the iterator to remove the page from its
    radix tree.
    
    Define functions vm_page_iter_remove() and vm_page_iter_free() that
    are like vm_page_remove() and vm_page_free(), respectively, except
    that they take an iterator as parameter rather than a page, and use
    the iterator to remove the page from the radix tree instead of
    searching the radix tree. Function vm_page_iter_free() assumes that
    the page is associated with an object, and calls
    vm_page_free_object_prep to do the part of vm_page_free_prep that is
    object-related.
    
    In functions vm_object_split and vm_object_collapse_scan, use a
    pctrie_iter to walk over the pages of the object, and use
    vm_page_rename and vm_radix_iter_remove modify the radix tree without
    searching for pages.  In vm_object_page_remove and _kmem_unback, use a
    pctrie_iter and vm_page_iter_free to remove the page from the radix
    tree.
    
    Reviewed by:    markj (prevoius version)
    Tested by:      pho
    Differential Revision:  https://reviews.freebsd.org/D46724
---
 sys/vm/vm_kern.c   |  12 +++---
 sys/vm/vm_object.c |  72 +++++++++++++++----------------
 sys/vm/vm_page.c   | 124 ++++++++++++++++++++++++++++++++++++++++-------------
 sys/vm/vm_page.h   |   4 +-
 4 files changed, 141 insertions(+), 71 deletions(-)

diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 22776e2196b0..6343fb66cfa3 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -634,8 +634,9 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t 
size, int flags)
 static struct vmem *
 _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
 {
+       struct pctrie_iter pages;
        struct vmem *arena;
-       vm_page_t m, next;
+       vm_page_t m;
        vm_offset_t end, offset;
        int domain;
 
@@ -648,17 +649,18 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, 
vm_size_t size)
        offset = addr - VM_MIN_KERNEL_ADDRESS;
        end = offset + size;
        VM_OBJECT_WLOCK(object);
-       m = vm_page_lookup(object, atop(offset)); 
+       vm_page_iter_init(&pages, object);
+       m = vm_page_iter_lookup(&pages, atop(offset)); 
        domain = vm_page_domain(m);
        if (__predict_true((m->oflags & VPO_KMEM_EXEC) == 0))
                arena = vm_dom[domain].vmd_kernel_arena;
        else
                arena = vm_dom[domain].vmd_kernel_rwx_arena;
-       for (; offset < end; offset += PAGE_SIZE, m = next) {
-               next = vm_page_next(m);
+       for (; offset < end; offset += PAGE_SIZE,
+           m = vm_page_iter_lookup(&pages, atop(offset))) {
                vm_page_xbusy_claim(m);
                vm_page_unwire_noq(m);
-               vm_page_free(m);
+               vm_page_iter_free(&pages);
        }
        VM_OBJECT_WUNLOCK(object);
 
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index e6324647e29e..21773318cea0 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1520,9 +1520,10 @@ vm_object_shadow(vm_object_t *object, vm_ooffset_t 
*offset, vm_size_t length,
 void
 vm_object_split(vm_map_entry_t entry)
 {
-       vm_page_t m, m_next;
+       struct pctrie_iter pages;
+       vm_page_t m;
        vm_object_t orig_object, new_object, backing_object;
-       vm_pindex_t idx, offidxstart;
+       vm_pindex_t offidxstart;
        vm_size_t size;
 
        orig_object = entry->object.vm_object;
@@ -1573,17 +1574,11 @@ vm_object_split(vm_map_entry_t entry)
         * that the object is in transition.
         */
        vm_object_set_flag(orig_object, OBJ_SPLIT);
-#ifdef INVARIANTS
-       idx = 0;
-#endif
+       vm_page_iter_limit_init(&pages, orig_object, offidxstart + size);
 retry:
-       m = vm_page_find_least(orig_object, offidxstart);
-       KASSERT(m == NULL || idx <= m->pindex - offidxstart,
-           ("%s: object %p was repopulated", __func__, orig_object));
-       for (; m != NULL && (idx = m->pindex - offidxstart) < size;
-           m = m_next) {
-               m_next = TAILQ_NEXT(m, listq);
-
+       pctrie_iter_reset(&pages);
+       for (m = vm_page_iter_lookup_ge(&pages, offidxstart); m != NULL;
+           m = vm_radix_iter_step(&pages)) {
                /*
                 * We must wait for pending I/O to complete before we can
                 * rename the page.
@@ -1604,13 +1599,13 @@ retry:
                 * an incomplete fault.  Just remove and ignore.
                 */
                if (vm_page_none_valid(m)) {
-                       if (vm_page_remove(m))
+                       if (vm_page_iter_remove(&pages))
                                vm_page_free(m);
                        continue;
                }
 
                /* vm_page_rename() will dirty the page. */
-               if (vm_page_rename(m, new_object, idx)) {
+               if (vm_page_rename(&pages, new_object, m->pindex - 
offidxstart)) {
                        vm_page_xunbusy(m);
                        VM_OBJECT_WUNLOCK(new_object);
                        VM_OBJECT_WUNLOCK(orig_object);
@@ -1656,7 +1651,8 @@ retry:
 }
 
 static vm_page_t
-vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p)
+vm_object_collapse_scan_wait(struct pctrie_iter *pages, vm_object_t object,
+    vm_page_t p)
 {
        vm_object_t backing_object;
 
@@ -1683,12 +1679,14 @@ vm_object_collapse_scan_wait(vm_object_t object, 
vm_page_t p)
                VM_OBJECT_WLOCK(object);
        }
        VM_OBJECT_WLOCK(backing_object);
-       return (TAILQ_FIRST(&backing_object->memq));
+       vm_page_iter_init(pages, backing_object);
+       return (vm_page_iter_lookup_ge(pages, 0));
 }
 
 static void
 vm_object_collapse_scan(vm_object_t object)
 {
+       struct pctrie_iter pages;
        vm_object_t backing_object;
        vm_page_t next, p, pp;
        vm_pindex_t backing_offset_index, new_pindex;
@@ -1702,7 +1700,8 @@ vm_object_collapse_scan(vm_object_t object)
        /*
         * Our scan
         */
-       for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) {
+       vm_page_iter_init(&pages, backing_object);
+       for (p = vm_page_iter_lookup_ge(&pages, 0); p != NULL; p = next) {
                next = TAILQ_NEXT(p, listq);
                new_pindex = p->pindex - backing_offset_index;
 
@@ -1710,7 +1709,7 @@ vm_object_collapse_scan(vm_object_t object)
                 * Check for busy page
                 */
                if (vm_page_tryxbusy(p) == 0) {
-                       next = vm_object_collapse_scan_wait(object, p);
+                       next = vm_object_collapse_scan_wait(&pages, object, p);
                        continue;
                }
 
@@ -1727,16 +1726,18 @@ vm_object_collapse_scan(vm_object_t object)
 
                        KASSERT(!pmap_page_is_mapped(p),
                            ("freeing mapped page %p", p));
-                       if (vm_page_remove(p))
+                       if (vm_page_iter_remove(&pages))
                                vm_page_free(p);
+                       next = vm_radix_iter_step(&pages);
                        continue;
                }
 
                if (!vm_page_all_valid(p)) {
                        KASSERT(!pmap_page_is_mapped(p),
                            ("freeing mapped page %p", p));
-                       if (vm_page_remove(p))
+                       if (vm_page_iter_remove(&pages))
                                vm_page_free(p);
+                       next = vm_radix_iter_step(&pages);
                        continue;
                }
 
@@ -1749,7 +1750,7 @@ vm_object_collapse_scan(vm_object_t object)
                         * busy bit owner, we can't tell whether it shadows the
                         * original page.
                         */
-                       next = vm_object_collapse_scan_wait(object, pp);
+                       next = vm_object_collapse_scan_wait(&pages, object, pp);
                        continue;
                }
 
@@ -1775,10 +1776,11 @@ vm_object_collapse_scan(vm_object_t object)
                        vm_pager_freespace(backing_object, p->pindex, 1);
                        KASSERT(!pmap_page_is_mapped(p),
                            ("freeing mapped page %p", p));
-                       if (vm_page_remove(p))
-                               vm_page_free(p);
                        if (pp != NULL)
                                vm_page_xunbusy(pp);
+                       if (vm_page_iter_remove(&pages))
+                               vm_page_free(p);
+                       next = vm_radix_iter_step(&pages);
                        continue;
                }
 
@@ -1789,9 +1791,10 @@ vm_object_collapse_scan(vm_object_t object)
                 * If the page was mapped to a process, it can remain mapped
                 * through the rename.  vm_page_rename() will dirty the page.
                 */
-               if (vm_page_rename(p, object, new_pindex)) {
+               if (vm_page_rename(&pages, object, new_pindex)) {
                        vm_page_xunbusy(p);
-                       next = vm_object_collapse_scan_wait(object, NULL);
+                       next = vm_object_collapse_scan_wait(&pages, object,
+                           NULL);
                        continue;
                }
 
@@ -1807,6 +1810,7 @@ vm_object_collapse_scan(vm_object_t object)
                    backing_offset_index);
 #endif
                vm_page_xunbusy(p);
+               next = vm_radix_iter_step(&pages);
        }
        return;
 }
@@ -1981,7 +1985,8 @@ void
 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
     int options)
 {
-       vm_page_t p, next;
+       struct pctrie_iter pages;
+       vm_page_t p;
 
        VM_OBJECT_ASSERT_WLOCKED(object);
        KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
@@ -1990,16 +1995,11 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t 
start, vm_pindex_t end,
        if (object->resident_page_count == 0)
                return;
        vm_object_pip_add(object, 1);
+       vm_page_iter_limit_init(&pages, object, end);
 again:
-       p = vm_page_find_least(object, start);
-
-       /*
-        * Here, the variable "p" is either (1) the page with the least pindex
-        * greater than or equal to the parameter "start" or (2) NULL. 
-        */
-       for (; p != NULL && (p->pindex < end || end == 0); p = next) {
-               next = TAILQ_NEXT(p, listq);
-
+       pctrie_iter_reset(&pages);
+       for (p = vm_page_iter_lookup_ge(&pages, start); p != NULL;
+            p = vm_radix_iter_step(&pages)) {
                /*
                 * Skip invalid pages if asked to do so.  Try to avoid acquiring
                 * the busy lock, as some consumers rely on this to avoid
@@ -2060,7 +2060,7 @@ wired:
                if ((options & OBJPR_NOTMAPPED) == 0 &&
                    object->ref_count != 0 && !vm_page_try_remove_all(p))
                        goto wired;
-               vm_page_free(p);
+               vm_page_iter_free(&pages);
        }
        vm_object_pip_wakeup(object);
 
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 0b9b55337b52..7d093579e35d 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -170,8 +170,9 @@ static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t 
m,
     vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(vm_page_t m, uint8_t queue);
-static bool vm_page_free_prep(vm_page_t m);
+static bool vm_page_free_prep(vm_page_t m, bool do_remove);
 static void vm_page_free_toq(vm_page_t m);
+static void vm_page_free_toq_impl(vm_page_t m, bool do_remove);
 static void vm_page_init(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred);
@@ -1386,6 +1387,22 @@ vm_page_free(vm_page_t m)
        vm_page_free_toq(m);
 }
 
+/*
+ *     vm_page_iter_free:
+ *
+ *     Free the current page, as identified by iterator.
+ */
+void
+vm_page_iter_free(struct pctrie_iter *pages)
+{
+       vm_page_t m;
+
+       m = vm_radix_iter_page(pages);
+       vm_radix_iter_remove(pages);
+       m->flags &= ~PG_ZERO;
+       vm_page_free_toq_impl(m, false);
+}
+
 /*
  *     vm_page_free_zero:
  *
@@ -1639,14 +1656,18 @@ vm_page_insert_radixdone(vm_page_t m, vm_object_t 
object, vm_page_t mpred)
 }
 
 /*
- * Do the work to remove a page from its object.  The caller is responsible for
- * updating the page's fields to reflect this removal.
+ *     vm_page_remove_radixdone
+ *
+ *     Complete page "m" removal from the specified object after the radix trie
+ *     unhooking.
+ *
+ *     The caller is responsible for updating the page's fields to reflect this
+ *     removal.
  */
 static void
-vm_page_object_remove(vm_page_t m)
+vm_page_remove_radixdone(vm_page_t m)
 {
        vm_object_t object;
-       vm_page_t mrem __diagused;
 
        vm_page_assert_xbusied(m);
        object = m->object;
@@ -1659,10 +1680,7 @@ vm_page_object_remove(vm_page_t m)
                vm_pager_page_unswapped(m);
 
        vm_pager_page_removed(object, m);
-
        m->object = NULL;
-       mrem = vm_radix_remove(&object->rtree, m->pindex);
-       KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m));
 
        /*
         * Now remove from the object's list of backed pages.
@@ -1704,6 +1722,42 @@ vm_page_remove(vm_page_t m)
        return (dropped);
 }
 
+/*
+ *     vm_page_iter_remove:
+ *
+ *     Remove the current page, as identified by iterator, and remove it from 
the
+ *     radix tree.
+ */
+bool
+vm_page_iter_remove(struct pctrie_iter *pages)
+{
+       vm_page_t m;
+       bool dropped;
+
+       m = vm_radix_iter_page(pages);
+       vm_radix_iter_remove(pages);
+       vm_page_remove_radixdone(m);
+       dropped = (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF);
+       vm_page_xunbusy(m);
+
+       return (dropped);
+}
+
+/*
+ *     vm_page_radix_remove
+ *
+ *     Removes the specified page from the radix tree.
+ */
+static void
+vm_page_radix_remove(vm_page_t m)
+{
+       vm_page_t mrem __diagused;
+
+       mrem = vm_radix_remove(&m->object->rtree, m->pindex);
+       KASSERT(mrem == m,
+           ("removed page %p, expected page %p", mrem, m));
+}
+
 /*
  *     vm_page_remove_xbusy
  *
@@ -1714,7 +1768,8 @@ bool
 vm_page_remove_xbusy(vm_page_t m)
 {
 
-       vm_page_object_remove(m);
+       vm_page_radix_remove(m);
+       vm_page_remove_radixdone(m);
        return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF);
 }
 
@@ -1985,8 +2040,8 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, 
vm_pindex_t pindex,
 /*
  *     vm_page_rename:
  *
- *     Move the given memory entry from its
- *     current object to the specified target object/offset.
+ *     Move the current page, as identified by iterator, from its current
+ *     object to the specified target object/offset.
  *
  *     Note: swap associated with the page must be invalidated by the move.  We
  *           have to do this for several reasons:  (1) we aren't freeing the
@@ -2001,13 +2056,15 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, 
vm_pindex_t pindex,
  *     The objects must be locked.
  */
 int
-vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
+vm_page_rename(struct pctrie_iter *pages,
+    vm_object_t new_object, vm_pindex_t new_pindex)
 {
-       vm_page_t mpred;
+       vm_page_t m, mpred;
        vm_pindex_t opidx;
 
        VM_OBJECT_ASSERT_WLOCKED(new_object);
 
+       m = vm_radix_iter_page(pages);
        KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m));
 
        /*
@@ -2027,7 +2084,8 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, 
vm_pindex_t new_pindex)
         * the listq iterator is tainted.
         */
        m->pindex = opidx;
-       vm_page_object_remove(m);
+       vm_radix_iter_remove(pages);
+       vm_page_remove_radixdone(m);
 
        /* Return back to the new pindex to complete vm_page_insert(). */
        m->pindex = new_pindex;
@@ -3122,7 +3180,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long 
npages, vm_page_t m_run,
                                        vm_page_dequeue(m);
                                        if (vm_page_replace_hold(m_new, object,
                                            m->pindex, m) &&
-                                           vm_page_free_prep(m))
+                                           vm_page_free_prep(m, true))
                                                SLIST_INSERT_HEAD(&free, m,
                                                    plinks.s.ss);
 
@@ -3134,7 +3192,7 @@ vm_page_reclaim_run(int req_class, int domain, u_long 
npages, vm_page_t m_run,
                                } else {
                                        m->flags &= ~PG_ZERO;
                                        vm_page_dequeue(m);
-                                       if (vm_page_free_prep(m))
+                                       if (vm_page_free_prep(m, true))
                                                SLIST_INSERT_HEAD(&free, m,
                                                    plinks.s.ss);
                                        KASSERT(m->dirty == 0,
@@ -4073,7 +4131,7 @@ vm_page_enqueue(vm_page_t m, uint8_t queue)
  *     page must be unmapped.
  */
 static bool
-vm_page_free_prep(vm_page_t m)
+vm_page_free_prep(vm_page_t m, bool do_remove)
 {
 
        /*
@@ -4120,7 +4178,9 @@ vm_page_free_prep(vm_page_t m)
                    m->ref_count == VPRC_OBJREF,
                    ("vm_page_free_prep: page %p has unexpected ref_count %u",
                    m, m->ref_count));
-               vm_page_object_remove(m);
+               if (do_remove)
+                       vm_page_radix_remove(m);
+               vm_page_remove_radixdone(m);
                m->ref_count -= VPRC_OBJREF;
        } else
                vm_page_assert_unbusied(m);
@@ -4172,22 +4232,13 @@ vm_page_free_prep(vm_page_t m)
        return (true);
 }
 
-/*
- *     vm_page_free_toq:
- *
- *     Returns the given page to the free list, disassociating it
- *     from any VM object.
- *
- *     The object must be locked.  The page must be exclusively busied if it
- *     belongs to an object.
- */
 static void
-vm_page_free_toq(vm_page_t m)
+vm_page_free_toq_impl(vm_page_t m, bool do_remove)
 {
        struct vm_domain *vmd;
        uma_zone_t zone;
 
-       if (!vm_page_free_prep(m))
+       if (!vm_page_free_prep(m, do_remove))
                return;
 
        vmd = vm_pagequeue_domain(m);
@@ -4202,6 +4253,21 @@ vm_page_free_toq(vm_page_t m)
        vm_domain_freecnt_inc(vmd, 1);
 }
 
+/*
+ *     vm_page_free_toq:
+ *
+ *     Returns the given page to the free list, disassociating it
+ *     from any VM object.
+ *
+ *     The object must be locked.  The page must be exclusively busied if it
+ *     belongs to an object.
+ */
+static void
+vm_page_free_toq(vm_page_t m)
+{
+       vm_page_free_toq_impl(m, true);
+}
+
 /*
  *     vm_page_free_pages_toq:
  *
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 893608bcacf1..613896e77dd9 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -602,6 +602,7 @@ bool vm_page_busy_sleep(vm_page_t m, const char *msg, int 
allocflags);
 void vm_page_busy_sleep_unlocked(vm_object_t obj, vm_page_t m,
     vm_pindex_t pindex, const char *wmesg, int allocflags);
 void vm_page_free(vm_page_t m);
+void vm_page_iter_free(struct pctrie_iter *);
 void vm_page_free_zero(vm_page_t m);
 
 void vm_page_activate (vm_page_t);
@@ -679,8 +680,9 @@ void vm_page_release(vm_page_t m, int flags);
 void vm_page_release_locked(vm_page_t m, int flags);
 vm_page_t vm_page_relookup(vm_object_t, vm_pindex_t);
 bool vm_page_remove(vm_page_t);
+bool vm_page_iter_remove(struct pctrie_iter *);
 bool vm_page_remove_xbusy(vm_page_t);
-int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t);
+int vm_page_rename(struct pctrie_iter *, vm_object_t, vm_pindex_t);
 void vm_page_replace(vm_page_t mnew, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mold);
 int vm_page_sbusied(vm_page_t m);

Reply via email to