> Date: Fri, 26 Sep 2014 21:01:38 +0200 (CEST)
> From: Mark Kettenis <[email protected]>
>
> The diff below adds a kernel thread that makes memory pages filled
> with zeroes without olding the kernel lock. The idea is that this
> should speed up MP systems because the kernel can do some useful work
> in parallel with other things, and could lower the latency on all
> systems because (userland) memory page allocation will be faster. The
> thread runs at the absolutely lowest priority such that we only run it
> if we don't have anything else to do.
>
> But this could also slow down some systems though, because zeroing
> pages can thrash the caches in your system.
>
> So I'd like to see this diff tested on a wide variety of systems, and
> hear back from people how this diff affects their OpenBSD systems.
> Subjective reports are ok; hard benchmarks are better.
>
> One thing to look at when you're running this diff is the output of
> "vmstat -s". It will report something like:
>
> 2146380 zeroed page hits
> 34258 zeroed page misses
>
> You want the number of hits to be significantly larger than the number
> of misses. And you want the number of hits to keep growing. Another
> thing to look at is "systat vm". If you mke your screen a bit bigger
> than 25 lines, you'll see a "pzidle" counter which indicates how much
> zeroed pages have been consumed (negative) or created (positive).
>
> Thanks,
>
> Mark
Forgot to include a file in the diff. Here is a complete one:
Index: kern/init_main.c
===================================================================
RCS file: /home/cvs/src/sys/kern/init_main.c,v
retrieving revision 1.217
diff -u -p -r1.217 init_main.c
--- kern/init_main.c 14 Aug 2014 09:01:47 -0000 1.217
+++ kern/init_main.c 2 Sep 2014 19:02:37 -0000
@@ -526,6 +526,10 @@ main(void *framep)
if (kthread_create(uvm_aiodone_daemon, NULL, NULL, "aiodoned"))
panic("fork aiodoned");
+ /* Create the page zeroing kernel thread. */
+ if (kthread_create(uvm_pagezero_thread, NULL, NULL, "zerothread"))
+ panic("fork zerothread");
+
#if defined(MULTIPROCESSOR)
/* Boot the secondary processors. */
cpu_boot_secondary_processors();
Index: uvm/uvm_extern.h
===================================================================
RCS file: /home/cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.119
diff -u -p -r1.119 uvm_extern.h
--- uvm/uvm_extern.h 11 Jul 2014 16:35:40 -0000 1.119
+++ uvm/uvm_extern.h 12 Jul 2014 19:02:23 -0000
@@ -519,6 +519,7 @@ void uvm_vnp_sync(struct mount *);
void uvm_vnp_terminate(struct vnode *);
boolean_t uvm_vnp_uncache(struct vnode *);
struct uvm_object *uvn_attach(struct vnode *, vm_prot_t);
+void uvm_pagezero_thread(void *);
void kmeminit_nkmempages(void);
void kmeminit(void);
extern u_int nkmempages;
Index: uvm/uvm_page.h
===================================================================
RCS file: /home/cvs/src/sys/uvm/uvm_page.h,v
retrieving revision 1.54
diff -u -p -r1.54 uvm_page.h
--- uvm/uvm_page.h 11 Jul 2014 16:35:40 -0000 1.54
+++ uvm/uvm_page.h 12 Jul 2014 19:02:23 -0000
@@ -296,7 +296,7 @@ int vm_physseg_find(paddr_t, int *);
#define uvm_lock_fpageq() mtx_enter(&uvm.fpageqlock);
#define uvm_unlock_fpageq() mtx_leave(&uvm.fpageqlock);
-#define UVM_PAGEZERO_TARGET (uvmexp.free)
+#define UVM_PAGEZERO_TARGET (uvmexp.free / 8)
#define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr)
Index: uvm/uvm_pmemrange.c
===================================================================
RCS file: /home/cvs/src/sys/uvm/uvm_pmemrange.c,v
retrieving revision 1.41
diff -u -p -r1.41 uvm_pmemrange.c
--- uvm/uvm_pmemrange.c 14 Sep 2014 14:17:27 -0000 1.41
+++ uvm/uvm_pmemrange.c 24 Sep 2014 15:29:00 -0000
@@ -21,6 +21,7 @@
#include <uvm/uvm.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/mount.h>
/*
@@ -107,7 +108,7 @@ void uvm_pmr_assertvalid(struct uvm_pmem
#endif
int uvm_pmr_get1page(psize_t, int, struct pglist *,
- paddr_t, paddr_t);
+ paddr_t, paddr_t, int);
struct uvm_pmemrange *uvm_pmr_allocpmr(void);
struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int);
@@ -824,7 +825,7 @@ retry_desperate:
if (count <= maxseg && align == 1 && boundary == 0 &&
(flags & UVM_PLA_TRYCONTIG) == 0) {
fcount += uvm_pmr_get1page(count - fcount, memtype_init,
- result, start, end);
+ result, start, end, 0);
/*
* If we found sufficient pages, go to the succes exit code.
@@ -1036,6 +1037,8 @@ out:
if (found->pg_flags & PG_ZERO) {
uvmexp.zeropages--;
+ if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
+ wakeup(&uvmexp.zeropages);
}
if (flags & UVM_PLA_ZERO) {
if (found->pg_flags & PG_ZERO)
@@ -1130,6 +1133,8 @@ uvm_pmr_freepages(struct vm_page *pg, ps
pg += pmr_count;
}
wakeup(&uvmexp.free);
+ if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
+ wakeup(&uvmexp.zeropages);
uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg), ptoa(count));
@@ -1167,6 +1172,8 @@ uvm_pmr_freepageq(struct pglist *pgl)
uvm_wakeup_pla(pstart, ptoa(plen));
}
wakeup(&uvmexp.free);
+ if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
+ wakeup(&uvmexp.zeropages);
uvm_unlock_fpageq();
return;
@@ -1663,7 +1670,7 @@ uvm_pmr_rootupdate(struct uvm_pmemrange
*/
int
uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result,
- paddr_t start, paddr_t end)
+ paddr_t start, paddr_t end, int memtype_only)
{
struct uvm_pmemrange *pmr;
struct vm_page *found, *splitpg;
@@ -1779,6 +1786,8 @@ uvm_pmr_get1page(psize_t count, int memt
uvm_pmr_remove_addr(pmr, found);
uvm_pmr_assertvalid(pmr);
} else {
+ if (memtype_only)
+ break;
/*
* Skip to the next memtype.
*/
@@ -1941,5 +1950,42 @@ uvm_wakeup_pla(paddr_t low, psize_t len)
wakeup(pma);
}
}
+ }
+}
+
+void
+uvm_pagezero_thread(void *arg)
+{
+ struct pglist pgl;
+ struct vm_page *pg;
+ int count;
+
+ /* Run at the lowest possible priority. */
+ curproc->p_p->ps_nice = NZERO + PRIO_MAX;
+
+ KERNEL_UNLOCK();
+
+ for (;;) {
+ uvm_lock_fpageq();
+ while (uvmexp.zeropages >= UVM_PAGEZERO_TARGET ||
+ (count = uvm_pmr_get1page(16, UVM_PMR_MEMTYPE_DIRTY,
+ &pgl, 0, 0, 1)) == 0) {
+ msleep(&uvmexp.zeropages, &uvm.fpageqlock, MAXPRI,
+ "pgzero", 0);
+ }
+ uvm_unlock_fpageq();
+
+ TAILQ_FOREACH(pg, &pgl, pageq) {
+ uvm_pagezero(pg);
+ atomic_setbits_int(&pg->pg_flags, PG_ZERO);
+ }
+
+ uvm_lock_fpageq();
+ while (!TAILQ_EMPTY(&pgl))
+ uvm_pmr_remove_1strange(&pgl, 0, NULL, 0);
+ uvmexp.zeropages += count;
+ uvm_unlock_fpageq();
+
+ yield();
}
}