It's possible to allocate all pages in a segment (and higher priority
ones) to privileged threads. This can result in a panic even though
memory is available in lower priority segments which haven't yet been
rebalanced by the pageout daemon. Rebalancing is now done actively
during privileged page allocation.
---
 vm/vm_page.c     | 31 +++++++++++++++++++++++--------
 vm/vm_page.h     |  3 +++
 vm/vm_resident.c |  4 ----
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/vm/vm_page.c b/vm/vm_page.c
index 5cdf0c7b..a656aa01 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -990,7 +990,8 @@ vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct 
vm_page_seg *seg2)
  */
 static boolean_t
 vm_page_seg_balance_page(struct vm_page_seg *seg,
-                         struct vm_page_seg *remote_seg)
+                         struct vm_page_seg *remote_seg,
+                         boolean_t priv_alloc)
 {
     struct vm_page *src, *dest;
     vm_object_t object;
@@ -1002,7 +1003,9 @@ vm_page_seg_balance_page(struct vm_page_seg *seg,
     vm_page_seg_double_lock(seg, remote_seg);
 
     if (vm_page_seg_usable(seg)
-        || !vm_page_seg_page_available(remote_seg)) {
+        || (priv_alloc
+            ? remote_seg->nr_free_pages == 0
+            : !vm_page_seg_page_available(remote_seg))) {
         goto error;
     }
 
@@ -1082,7 +1085,7 @@ error:
 }
 
 static boolean_t
-vm_page_seg_balance(struct vm_page_seg *seg)
+vm_page_seg_balance(struct vm_page_seg *seg, boolean_t priv_alloc)
 {
     struct vm_page_seg *remote_seg;
     unsigned int i;
@@ -1100,7 +1103,7 @@ vm_page_seg_balance(struct vm_page_seg *seg)
             continue;
         }
 
-        balanced = vm_page_seg_balance_page(seg, remote_seg);
+        balanced = vm_page_seg_balance_page(seg, remote_seg, priv_alloc);
 
         if (balanced) {
             return TRUE;
@@ -1611,16 +1614,28 @@ vm_page_alloc_pa(unsigned int order, unsigned int 
selector, unsigned short type)
     struct vm_page *page;
     unsigned int i;
 
-    for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
+    const unsigned int seg_index = vm_page_select_alloc_seg(selector);
+
+retry:
+    simple_lock(&vm_page_queue_free_lock);
+
+    for (i = seg_index; i < vm_page_segs_size; i--) {
         page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
 
         if (page != NULL)
             return page;
     }
 
-    /* FIXME: rebalance segments? */
     if (!current_thread() || current_thread()->vm_privilege)
-        panic("vm_page: privileged thread unable to allocate page");
+      {
+       simple_unlock(&vm_page_queue_free_lock);
+
+       for (i = seg_index; i < vm_page_segs_size; i--)
+         if (vm_page_seg_balance(vm_page_seg_get(i), TRUE))
+           goto retry;
+
+       panic("vm_page: privileged thread unable to allocate page");
+      }
 
     return NULL;
 }
@@ -1989,7 +2004,7 @@ vm_page_balance_once(void)
      */
 
     for (i = 0; i < vm_page_segs_size; i++) {
-        balanced = vm_page_seg_balance(vm_page_seg_get(i));
+        balanced = vm_page_seg_balance(vm_page_seg_get(i), FALSE);
 
         if (balanced) {
             return TRUE;
diff --git a/vm/vm_page.h b/vm/vm_page.h
index 9e110209..49b5e602 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -461,6 +461,9 @@ struct vm_page * vm_page_lookup_pa(phys_addr_t pa);
  * The selector is used to determine the segments from which allocation can
  * be attempted.
  *
+ * vm_page_queue_free_lock should be in an unlocked state pre-call but
+ * will always be locked on return.
+ *
  * This function should only be used by the vm_resident module.
  */
 struct vm_page * vm_page_alloc_pa(unsigned int order, unsigned int selector,
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index a6a90026..aaf5fc8b 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -808,8 +808,6 @@ vm_page_t vm_page_grab(unsigned flags)
        else
                selector = VM_PAGE_SEL_DMA;
 
-       simple_lock(&vm_page_queue_free_lock);
-
        /*
         * XXX Mach has many modules that merely assume memory is
         * directly mapped in kernel space. Instead of updating all
@@ -901,8 +899,6 @@ vm_page_t vm_page_grab_contig(
        order = vm_page_order(size);
        nr_pages = 1 << order;
 
-       simple_lock(&vm_page_queue_free_lock);
-
        /* TODO Allow caller to pass type */
        mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
 
-- 
2.47.3


Reply via email to