On 17/11/20(Tue) 13:52, Mark Kettenis wrote:
> > Date: Tue, 17 Nov 2020 09:32:28 -0300
> > From: Martin Pieuchot <[email protected]>
> >
> > On 17/11/20(Tue) 13:23, Mark Kettenis wrote:
> > > > Date: Mon, 16 Nov 2020 10:11:50 -0300
> > > > From: Martin Pieuchot <[email protected]>
> > > >
> > > > On 13/11/20(Fri) 21:05, Mark Kettenis wrote:
> > > > > [...]
> > > > > > Careful reviewers will spot an off-by-one change in the check for
> > > > > > pagedaemon's reserved memory. My understanding is that it's a
> > > > > > bugfix,
> > > > > > is it correct?
> > > > >
> > > > > You mean for uvm_pagealloc(). I'd say yes. But this does mean that
> > > > > in some sense the pagedaemon reserve grows by a page at the cost of
> > > > > the kernel reserve. So I think it would be a good idea to bump the
> > > > > kernel reserve a bit. Maybe to 8 pages?
> > > >
> > > > Fine with me. Could you do it?
> > >
> > > I think it should be done as part of this diff.
> >
> > Fine with me, updated diff below.
>
> Not quite. Need to bump the kernel reserve such that we still have
> enough on top of the pagedaemon reserve because your fix means the
> page daemon may consume an additional page.
Here we go:
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.154
diff -u -p -r1.154 uvm_extern.h
--- uvm/uvm_extern.h 19 Oct 2020 08:19:46 -0000 1.154
+++ uvm/uvm_extern.h 16 Nov 2020 13:09:39 -0000
@@ -142,14 +142,15 @@ typedef int vm_prot_t;
#define UVM_PGA_ZERO 0x0002 /* returned page must be zeroed
*/
/*
- * flags for uvm_pglistalloc()
+ * flags for uvm_pglistalloc() also used by uvm_pmr_getpages()
*/
#define UVM_PLA_WAITOK 0x0001 /* may sleep */
#define UVM_PLA_NOWAIT 0x0002 /* can't sleep (need one of the two) */
#define UVM_PLA_ZERO 0x0004 /* zero all pages before returning */
#define UVM_PLA_TRYCONTIG 0x0008 /* try to allocate contig physmem */
#define UVM_PLA_FAILOK 0x0010 /* caller can handle failure */
-#define UVM_PLA_NOWAKE 0x0020 /* don't wake the page daemon on
failure */
+#define UVM_PLA_NOWAKE 0x0020 /* don't wake page daemon on failure */
+#define UVM_PLA_USERESERVE 0x0040 /* can allocate from kernel reserve */
/*
* lockflags that control the locking behavior of various functions.
Index: uvm/uvm_page.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.c,v
retrieving revision 1.150
diff -u -p -r1.150 uvm_page.c
--- uvm/uvm_page.c 22 Sep 2020 14:31:08 -0000 1.150
+++ uvm/uvm_page.c 17 Nov 2020 13:13:03 -0000
@@ -277,7 +277,7 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr
* XXXCDC - values may need adjusting
*/
uvmexp.reserve_pagedaemon = 4;
- uvmexp.reserve_kernel = 6;
+ uvmexp.reserve_kernel = 8;
uvmexp.anonminpct = 10;
uvmexp.vnodeminpct = 10;
uvmexp.vtextminpct = 5;
@@ -733,32 +733,11 @@ uvm_pglistalloc(psize_t size, paddr_t lo
size = atop(round_page(size));
/*
- * check to see if we need to generate some free pages waking
- * the pagedaemon.
- */
- if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freemin ||
- ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg &&
- (uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg))
- wakeup(&uvm.pagedaemon);
-
- /*
* XXX uvm_pglistalloc is currently only used for kernel
* objects. Unlike the checks in uvm_pagealloc, below, here
- * we are always allowed to use the kernel reserve. However, we
- * have to enforce the pagedaemon reserve here or allocations
- * via this path could consume everything and we can't
- * recover in the page daemon.
+ * we are always allowed to use the kernel reserve.
*/
- again:
- if ((uvmexp.free <= uvmexp.reserve_pagedaemon + size &&
- !((curproc == uvm.pagedaemon_proc) ||
- (curproc == syncerproc)))) {
- if (flags & UVM_PLA_WAITOK) {
- uvm_wait("uvm_pglistalloc");
- goto again;
- }
- return (ENOMEM);
- }
+ flags |= UVM_PLA_USERESERVE;
if ((high & PAGE_MASK) != PAGE_MASK) {
printf("uvm_pglistalloc: Upper boundary 0x%lx "
@@ -871,7 +850,7 @@ uvm_pagerealloc_multi(struct uvm_object
}
/*
- * uvm_pagealloc_strat: allocate vm_page from a particular free list.
+ * uvm_pagealloc: allocate vm_page from a particular free list.
*
* => return null if no pages free
* => wake up pagedaemon if number of free pages drops below low water mark
@@ -886,37 +865,21 @@ uvm_pagealloc(struct uvm_object *obj, vo
struct vm_page *pg;
struct pglist pgl;
int pmr_flags;
- boolean_t use_reserve;
KASSERT(obj == NULL || anon == NULL);
+ KASSERT(anon == NULL || off == 0);
KASSERT(off == trunc_page(off));
- /*
- * check to see if we need to generate some free pages waking
- * the pagedaemon.
- */
- if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freemin ||
- ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg &&
- (uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg))
- wakeup(&uvm.pagedaemon);
+ pmr_flags = UVM_PLA_NOWAIT;
/*
- * fail if any of these conditions is true:
- * [1] there really are no free pages, or
- * [2] only kernel "reserved" pages remain and
- * the page isn't being allocated to a kernel object.
- * [3] only pagedaemon "reserved" pages remain and
- * the requestor isn't the pagedaemon.
+ * We're allowed to use the kernel reserve if the page is
+ * being allocated to a kernel object.
*/
- use_reserve = (flags & UVM_PGA_USERESERVE) ||
- (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
- if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
- (uvmexp.free <= uvmexp.reserve_pagedaemon &&
- !((curproc == uvm.pagedaemon_proc) ||
- (curproc == syncerproc))))
- goto fail;
+ if ((flags & UVM_PGA_USERESERVE) ||
+ (obj != NULL && UVM_OBJ_IS_KERN_OBJECT(obj)))
+ pmr_flags |= UVM_PLA_USERESERVE;
- pmr_flags = UVM_PLA_NOWAIT;
if (flags & UVM_PGA_ZERO)
pmr_flags |= UVM_PLA_ZERO;
TAILQ_INIT(&pgl);
Index: uvm/uvm_pmemrange.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pmemrange.c,v
retrieving revision 1.59
diff -u -p -r1.59 uvm_pmemrange.c
--- uvm/uvm_pmemrange.c 18 Feb 2020 12:13:40 -0000 1.59
+++ uvm/uvm_pmemrange.c 16 Nov 2020 13:03:51 -0000
@@ -869,6 +869,7 @@ uvm_pmr_getpages(psize_t count, paddr_t
KASSERT(boundary == 0 || powerof2(boundary));
KASSERT(boundary == 0 || maxseg * boundary >= count);
KASSERT(TAILQ_EMPTY(result));
+ KASSERT(!(flags & UVM_PLA_WAITOK) ^ !(flags & UVM_PLA_NOWAIT));
/*
* TRYCONTIG is a noop if you only want a single segment.
@@ -938,7 +939,41 @@ uvm_pmr_getpages(psize_t count, paddr_t
*/
desperate = 0;
+again:
uvm_lock_fpageq();
+
+ /*
+ * check to see if we need to generate some free pages waking
+ * the pagedaemon.
+ */
+ if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freemin ||
+ ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg &&
+ (uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg))
+ wakeup(&uvm.pagedaemon);
+
+ /*
+ * fail if any of these conditions is true:
+ * [1] there really are no free pages, or
+ * [2] only kernel "reserved" pages remain and
+ * the UVM_PLA_USERESERVE flag wasn't used.
+ * [3] only pagedaemon "reserved" pages remain and
+ * the requestor isn't the pagedaemon nor the syncer.
+ */
+ if ((uvmexp.free <= uvmexp.reserve_kernel) &&
+ !(flags & UVM_PLA_USERESERVE)) {
+ uvm_unlock_fpageq();
+ return ENOMEM;
+ }
+
+ if ((uvmexp.free <= (uvmexp.reserve_pagedaemon + count)) &&
+ (curproc != uvm.pagedaemon_proc) && (curproc != syncerproc)) {
+ uvm_unlock_fpageq();
+ if (flags & UVM_PLA_WAITOK) {
+ uvm_wait("uvm_pmr_getpages");
+ goto again;
+ }
+ return ENOMEM;
+ }
retry: /* Return point after sleeping. */
fcount = 0;
Index: uvm/uvmexp.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvmexp.h,v
retrieving revision 1.5
diff -u -p -r1.5 uvmexp.h
--- uvm/uvmexp.h 23 Apr 2020 07:57:27 -0000 1.5
+++ uvm/uvmexp.h 13 Nov 2020 13:51:45 -0000
@@ -39,6 +39,10 @@
/*
* uvmexp: global data structures that are exported to parts of the kernel
* other than the vm system.
+ *
+ * Locks used to protect struct members in this file:
+ * I immutable after creation
+ * F uvm_lock_fpageq
*/
struct uvmexp {
/* vm_page constants */
@@ -47,16 +51,16 @@ struct uvmexp {
int pageshift; /* page shift */
/* vm_page counters */
- int npages; /* number of pages we manage */
- int free; /* number of free pages */
+ int npages; /* [I] number of pages we manage */
+ int free; /* [F] number of free pages */
int active; /* number of active pages */
int inactive; /* number of pages that we free'd but may want back */
int paging; /* number of pages in the process of being paged out */
int wired; /* number of wired pages */
- int zeropages; /* number of zero'd pages */
- int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
- int reserve_kernel; /* number of pages reserved for kernel */
+ int zeropages; /* [F] number of zero'd pages */
+ int reserve_pagedaemon; /* [I] # of pages reserved for pagedaemon */
+ int reserve_kernel; /* [I] # of pages reserved for kernel */
int unused01; /* formerly anonpages */
int vnodepages; /* XXX # of pages used by vnode page cache */
int vtextpages; /* XXX # of pages used by vtext vnodes */