> Date: Fri, 26 Sep 2014 21:01:38 +0200 (CEST) > From: Mark Kettenis <mark.kette...@xs4all.nl> > > The diff below adds a kernel thread that makes memory pages filled > with zeroes without olding the kernel lock. The idea is that this > should speed up MP systems because the kernel can do some useful work > in parallel with other things, and could lower the latency on all > systems because (userland) memory page allocation will be faster. The > thread runs at the absolutely lowest priority such that we only run it > if we don't have anything else to do. > > But this could also slow down some systems though, because zeroing > pages can thrash the caches in your system. > > So I'd like to see this diff tested on a wide variety of systems, and > hear back from people how this diff affects their OpenBSD systems. > Subjective reports are ok; hard benchmarks are better. > > One thing to look at when you're running this diff is the output of > "vmstat -s". It will report something like: > > 2146380 zeroed page hits > 34258 zeroed page misses > > You want the number of hits to be significantly larger than the number > of misses. And you want the number of hits to keep growing. Another > thing to look at is "systat vm". If you mke your screen a bit bigger > than 25 lines, you'll see a "pzidle" counter which indicates how much > zeroed pages have been consumed (negative) or created (positive). > > Thanks, > > Mark
Forgot to include a file in the diff. Here is a complete one: Index: kern/init_main.c =================================================================== RCS file: /home/cvs/src/sys/kern/init_main.c,v retrieving revision 1.217 diff -u -p -r1.217 init_main.c --- kern/init_main.c 14 Aug 2014 09:01:47 -0000 1.217 +++ kern/init_main.c 2 Sep 2014 19:02:37 -0000 @@ -526,6 +526,10 @@ main(void *framep) if (kthread_create(uvm_aiodone_daemon, NULL, NULL, "aiodoned")) panic("fork aiodoned"); + /* Create the page zeroing kernel thread. */ + if (kthread_create(uvm_pagezero_thread, NULL, NULL, "zerothread")) + panic("fork zerothread"); + #if defined(MULTIPROCESSOR) /* Boot the secondary processors. */ cpu_boot_secondary_processors(); Index: uvm/uvm_extern.h =================================================================== RCS file: /home/cvs/src/sys/uvm/uvm_extern.h,v retrieving revision 1.119 diff -u -p -r1.119 uvm_extern.h --- uvm/uvm_extern.h 11 Jul 2014 16:35:40 -0000 1.119 +++ uvm/uvm_extern.h 12 Jul 2014 19:02:23 -0000 @@ -519,6 +519,7 @@ void uvm_vnp_sync(struct mount *); void uvm_vnp_terminate(struct vnode *); boolean_t uvm_vnp_uncache(struct vnode *); struct uvm_object *uvn_attach(struct vnode *, vm_prot_t); +void uvm_pagezero_thread(void *); void kmeminit_nkmempages(void); void kmeminit(void); extern u_int nkmempages; Index: uvm/uvm_page.h =================================================================== RCS file: /home/cvs/src/sys/uvm/uvm_page.h,v retrieving revision 1.54 diff -u -p -r1.54 uvm_page.h --- uvm/uvm_page.h 11 Jul 2014 16:35:40 -0000 1.54 +++ uvm/uvm_page.h 12 Jul 2014 19:02:23 -0000 @@ -296,7 +296,7 @@ int vm_physseg_find(paddr_t, int *); #define uvm_lock_fpageq() mtx_enter(&uvm.fpageqlock); #define uvm_unlock_fpageq() mtx_leave(&uvm.fpageqlock); -#define UVM_PAGEZERO_TARGET (uvmexp.free) +#define UVM_PAGEZERO_TARGET (uvmexp.free / 8) #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) Index: uvm/uvm_pmemrange.c =================================================================== RCS file: /home/cvs/src/sys/uvm/uvm_pmemrange.c,v retrieving revision 1.41 diff -u -p -r1.41 uvm_pmemrange.c --- uvm/uvm_pmemrange.c 14 Sep 2014 14:17:27 -0000 1.41 +++ uvm/uvm_pmemrange.c 24 Sep 2014 15:29:00 -0000 @@ -21,6 +21,7 @@ #include <uvm/uvm.h> #include <sys/malloc.h> #include <sys/kernel.h> +#include <sys/kthread.h> #include <sys/mount.h> /* @@ -107,7 +108,7 @@ void uvm_pmr_assertvalid(struct uvm_pmem #endif int uvm_pmr_get1page(psize_t, int, struct pglist *, - paddr_t, paddr_t); + paddr_t, paddr_t, int); struct uvm_pmemrange *uvm_pmr_allocpmr(void); struct vm_page *uvm_pmr_nfindsz(struct uvm_pmemrange *, psize_t, int); @@ -824,7 +825,7 @@ retry_desperate: if (count <= maxseg && align == 1 && boundary == 0 && (flags & UVM_PLA_TRYCONTIG) == 0) { fcount += uvm_pmr_get1page(count - fcount, memtype_init, - result, start, end); + result, start, end, 0); /* * If we found sufficient pages, go to the succes exit code. @@ -1036,6 +1037,8 @@ out: if (found->pg_flags & PG_ZERO) { uvmexp.zeropages--; + if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) + wakeup(&uvmexp.zeropages); } if (flags & UVM_PLA_ZERO) { if (found->pg_flags & PG_ZERO) @@ -1130,6 +1133,8 @@ uvm_pmr_freepages(struct vm_page *pg, ps pg += pmr_count; } wakeup(&uvmexp.free); + if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) + wakeup(&uvmexp.zeropages); uvm_wakeup_pla(VM_PAGE_TO_PHYS(firstpg), ptoa(count)); @@ -1167,6 +1172,8 @@ uvm_pmr_freepageq(struct pglist *pgl) uvm_wakeup_pla(pstart, ptoa(plen)); } wakeup(&uvmexp.free); + if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) + wakeup(&uvmexp.zeropages); uvm_unlock_fpageq(); return; @@ -1663,7 +1670,7 @@ uvm_pmr_rootupdate(struct uvm_pmemrange */ int uvm_pmr_get1page(psize_t count, int memtype_init, struct pglist *result, - paddr_t start, paddr_t end) + paddr_t start, paddr_t end, int memtype_only) { struct uvm_pmemrange *pmr; struct vm_page *found, *splitpg; @@ -1779,6 +1786,8 @@ uvm_pmr_get1page(psize_t count, int memt uvm_pmr_remove_addr(pmr, found); uvm_pmr_assertvalid(pmr); } else { + if (memtype_only) + break; /* * Skip to the next memtype. */ @@ -1941,5 +1950,42 @@ uvm_wakeup_pla(paddr_t low, psize_t len) wakeup(pma); } } + } +} + +void +uvm_pagezero_thread(void *arg) +{ + struct pglist pgl; + struct vm_page *pg; + int count; + + /* Run at the lowest possible priority. */ + curproc->p_p->ps_nice = NZERO + PRIO_MAX; + + KERNEL_UNLOCK(); + + for (;;) { + uvm_lock_fpageq(); + while (uvmexp.zeropages >= UVM_PAGEZERO_TARGET || + (count = uvm_pmr_get1page(16, UVM_PMR_MEMTYPE_DIRTY, + &pgl, 0, 0, 1)) == 0) { + msleep(&uvmexp.zeropages, &uvm.fpageqlock, MAXPRI, + "pgzero", 0); + } + uvm_unlock_fpageq(); + + TAILQ_FOREACH(pg, &pgl, pageq) { + uvm_pagezero(pg); + atomic_setbits_int(&pg->pg_flags, PG_ZERO); + } + + uvm_lock_fpageq(); + while (!TAILQ_EMPTY(&pgl)) + uvm_pmr_remove_1strange(&pgl, 0, NULL, 0); + uvmexp.zeropages += count; + uvm_unlock_fpageq(); + + yield(); } }