During a page fault multiples counters are updated. They fall into two
categories "fault counters" and "global statistics" both of which are
currently represented by int-sized fields inside a global: `uvmexp'.
Diff below makes use of the per-CPU counters_inc(9) API to make sure no
update is lost with an unlocked fault handler. I only converted the
fields touched by uvm_fault() to have a working solution and start a
discussion.
- Should we keep a single enum for all fields inside `uvmexp' or do we
want to separate "statistics counters" which are mostly used sys/arch
from "fault counters" which are only used in uvm/uvm_fault.c?
- The counter_add(9) API deals with uint64_t and currently uvmexp uses
int. Should we truncate or change the size of uvmexp fields or do
something else?
Comments?
Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.302
diff -u -p -r1.302 init_main.c
--- kern/init_main.c 7 Dec 2020 16:55:28 -0000 1.302
+++ kern/init_main.c 21 Dec 2020 19:37:13 -0000
@@ -432,6 +432,7 @@ main(void *framep)
#endif
mbcpuinit(); /* enable per cpu mbuf data */
+ uvm_init_percpu();
/* init exec and emul */
init_exec();
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.155
diff -u -p -r1.155 uvm_extern.h
--- uvm/uvm_extern.h 1 Dec 2020 13:56:22 -0000 1.155
+++ uvm/uvm_extern.h 21 Dec 2020 19:37:13 -0000
@@ -289,6 +289,7 @@ void uvm_vsunlock_device(struct proc
*
void *);
void uvm_pause(void);
void uvm_init(void);
+void uvm_init_percpu(void);
int uvm_io(vm_map_t, struct uio *, int);
#define UVM_IO_FIXPROT 0x01
Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.109
diff -u -p -r1.109 uvm_fault.c
--- uvm/uvm_fault.c 8 Dec 2020 12:26:31 -0000 1.109
+++ uvm/uvm_fault.c 21 Dec 2020 19:37:13 -0000
@@ -35,6 +35,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/percpu.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/mman.h>
@@ -271,7 +272,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
int result;
result = 0; /* XXX shut up gcc */
- uvmexp.fltanget++;
+ counters_inc(uvmexp_counters, flt_anget);
/* bump rusage counters */
if (anon->an_page)
curproc->p_ru.ru_minflt++;
@@ -295,7 +296,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0)
return (VM_PAGER_OK);
atomic_setbits_int(&pg->pg_flags, PG_WANTED);
- uvmexp.fltpgwait++;
+ counters_inc(uvmexp_counters, flt_pgwait);
/*
* the last unlock must be an atomic unlock+wait on
@@ -310,7 +311,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
if (pg == NULL) { /* out of RAM. */
uvmfault_unlockall(ufi, amap, NULL);
- uvmexp.fltnoram++;
+ counters_inc(uvmexp_counters, flt_noram);
uvm_wait("flt_noram1");
/* ready to relock and try again */
} else {
@@ -325,7 +326,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
* it is ok to read an_swslot here because
* we hold PG_BUSY on the page.
*/
- uvmexp.pageins++;
+ counters_inc(uvmexp_counters, pageins);
result = uvm_swap_get(pg, anon->an_swslot,
PGO_SYNCIO);
@@ -369,7 +370,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
uvm_anfree(anon); /* frees page for us */
if (locked)
uvmfault_unlockall(ufi, amap, NULL);
- uvmexp.fltpgrele++;
+ counters_inc(uvmexp_counters, flt_pgrele);
return (VM_PAGER_REFAULT); /* refault! */
}
@@ -426,7 +427,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
}
/* try it again! */
- uvmexp.fltanretry++;
+ counters_inc(uvmexp_counters, flt_anretry);
continue;
} /* while (1) */
@@ -547,7 +548,7 @@ uvm_fault_check(struct uvm_faultinfo *uf
/* need to clear */
uvmfault_unlockmaps(ufi, FALSE);
uvmfault_amapcopy(ufi);
- uvmexp.fltamcopy++;
+ counters_inc(uvmexp_counters, flt_amcopy);
return (ERESTART);
} else {
/*
@@ -699,7 +700,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
*/
if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
- uvmexp.flt_acow++;
+ counters_inc(uvmexp_counters, flt_acow);
oanon = anon; /* oanon = old */
anon = uvm_analloc();
if (anon) {
@@ -710,10 +711,10 @@ uvm_fault_upper(struct uvm_faultinfo *uf
if (anon == NULL || pg == NULL) {
uvmfault_unlockall(ufi, amap, NULL);
if (anon == NULL)
- uvmexp.fltnoanon++;
+ counters_inc(uvmexp_counters, flt_noanon);
else {
uvm_anfree(anon);
- uvmexp.fltnoram++;
+ counters_inc(uvmexp_counters, flt_noram);
}
if (uvm_swapisfull())
@@ -745,7 +746,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
* thus, no one can get at it until we are done with it.
*/
} else {
- uvmexp.flt_anon++;
+ counters_inc(uvmexp_counters, flt_anon);
oanon = anon;
pg = anon->an_page;
if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */
@@ -861,7 +862,7 @@ uvm_fault_upper_lookup(struct uvm_faulti
uvm_lock_pageq();
uvm_pageactivate(anon->an_page); /* reactivate */
uvm_unlock_pageq();
- uvmexp.fltnamap++;
+ counters_inc(uvmexp_counters, flt_namap);
/*
* Since this isn't the page that's actually faulting,
@@ -909,7 +910,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
struct vm_page *pages[UVM_MAXRANGE];
int error = ERESTART;
- uvmexp.faults++; /* XXX: locking? */
+ counters_inc(uvmexp_counters, faults);
TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL);
/* init the IN parameters in the ufi */
@@ -994,7 +995,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
* ("get" has the option of doing a pmap_enter for us)
*/
if (uobj != NULL) {
- uvmexp.fltlget++;
+ counters_inc(uvmexp_counters, flt_lget);
gotpages = flt->npages;
(void) uobj->pgops->pgo_get(uobj, ufi->entry->offset +
(flt->startva - ufi->entry->start),
@@ -1038,7 +1039,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
uvm_lock_pageq();
uvm_pageactivate(pages[lcv]); /* reactivate */
uvm_unlock_pageq();
- uvmexp.fltnomap++;
+ counters_inc(uvmexp_counters, flt_nomap);
/*
* Since this page isn't the page that's
@@ -1109,7 +1110,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
uvmfault_unlockall(ufi, amap, NULL);
- uvmexp.fltget++;
+ counters_inc(uvmexp_counters, flt_get);
gotpages = 1;
uoff = (ufi->orig_rvaddr - ufi->entry->start) +
ufi->entry->offset;
result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
@@ -1183,7 +1184,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
*
* set "pg" to the page we want to map in (uobjpage, usually)
*/
- uvmexp.flt_obj++;
+ counters_inc(uvmexp_counters, flt_obj);
if (UVM_ET_ISCOPYONWRITE(ufi->entry))
flt->enter_prot &= ~PROT_WRITE;
pg = uobjpage; /* map in the actual object */
@@ -1235,10 +1236,10 @@ uvm_fault_lower(struct uvm_faultinfo *uf
/* unlock and fail ... */
uvmfault_unlockall(ufi, amap, uobj);
if (anon == NULL)
- uvmexp.fltnoanon++;
+ counters_inc(uvmexp_counters, flt_noanon);
else {
uvm_anfree(anon);
- uvmexp.fltnoram++;
+ counters_inc(uvmexp_counters, flt_noram);
}
if (uvm_swapisfull())
@@ -1254,7 +1255,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
/* fill in the data */
if (uobjpage != PGO_DONTCARE) {
- uvmexp.flt_prcopy++;
+ counters_inc(uvmexp_counters, flt_prcopy);
/* copy page [pg now dirty] */
uvm_pagecopy(uobjpage, pg);
@@ -1277,7 +1278,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
uvm_unlock_pageq();
uobj = NULL;
} else {
- uvmexp.flt_przero++;
+ counters_inc(uvmexp_counters, flt_przero);
/*
* Page is zero'd and marked dirty by uvm_pagealloc()
* above.
@@ -1288,7 +1289,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
ufi->orig_rvaddr - ufi->entry->start, anon, 0)) {
uvmfault_unlockall(ufi, amap, NULL);
uvm_anfree(anon);
- uvmexp.fltnoamap++;
+ counters_inc(uvmexp_counters, flt_noamap);
if (uvm_swapisfull())
return (ENOMEM);
@@ -1580,7 +1581,7 @@ uvmfault_relock(struct uvm_faultinfo *uf
return TRUE;
}
- uvmexp.fltrelck++;
+ counters_inc(uvmexp_counters, flt_relck);
/*
* relock map. fail if version mismatch (in which case nothing
@@ -1592,6 +1593,6 @@ uvmfault_relock(struct uvm_faultinfo *uf
return(FALSE);
}
- uvmexp.fltrelckok++;
+ counters_inc(uvmexp_counters, flt_relckok);
return(TRUE); /* got it! */
}
Index: uvm/uvm_init.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_init.c,v
retrieving revision 1.40
diff -u -p -r1.40 uvm_init.c
--- uvm/uvm_init.c 11 May 2017 00:42:05 -0000 1.40
+++ uvm/uvm_init.c 21 Dec 2020 19:37:13 -0000
@@ -35,6 +35,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
+#include <sys/percpu.h>
#include <sys/resourcevar.h>
#include <sys/mman.h>
#include <sys/malloc.h>
@@ -52,6 +53,9 @@
struct uvm uvm; /* decl */
struct uvmexp uvmexp; /* decl */
+COUNTERS_BOOT_MEMORY(uvmexp_countersboot, exp_ncounters);
+struct cpumem *uvmexp_counters =
COUNTERS_BOOT_INITIALIZER(uvmexp_countersboot);
+
#if defined(VM_MIN_KERNEL_ADDRESS)
vaddr_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS;
#else
@@ -184,4 +188,10 @@ uvm_init(void)
uaddr_bestfit_create(vm_map_min(kmem_map),
vm_map_max(kmem_map)));
#endif /* !SMALL_KERNEL */
+}
+
+void
+uvm_init_percpu(void)
+{
+ uvmexp_counters = counters_alloc_ncpus(uvmexp_counters, exp_ncounters);
}
Index: uvm/uvm_meter.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.41
diff -u -p -r1.41 uvm_meter.c
--- uvm/uvm_meter.c 24 Jun 2020 22:03:45 -0000 1.41
+++ uvm/uvm_meter.c 21 Dec 2020 19:37:13 -0000
@@ -39,6 +39,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/percpu.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
@@ -178,9 +179,42 @@ uvm_sysctl(int *name, u_int namelen, voi
return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
sizeof(vmtotals)));
- case VM_UVMEXP:
- return (sysctl_rdstruct(oldp, oldlenp, newp, &uvmexp,
- sizeof(uvmexp)));
+ case VM_UVMEXP: {
+ struct uvmexp uexp;
+ uint64_t counters[exp_ncounters];
+
+ memcpy(&uexp, &uvmexp, sizeof(uexp));
+
+ counters_read(uvmexp_counters, counters, exp_ncounters);
+
+ /* stat counters */
+ uexp.faults = (int)counters[faults];
+ uexp.pageins = (int)counters[pageins];
+
+ /* fault subcounters */
+ uexp.fltnoram = (int)counters[flt_noram];
+ uexp.fltnoanon = (int)counters[flt_noanon];
+ uexp.fltnoamap = (int)counters[flt_noamap];
+ uexp.fltpgwait = (int)counters[flt_pgwait];
+ uexp.fltpgrele = (int)counters[flt_pgrele];
+ uexp.fltrelck = (int)counters[flt_relck];
+ uexp.fltrelckok = (int)counters[flt_relckok];
+ uexp.fltanget = (int)counters[flt_anget];
+ uexp.fltanretry = (int)counters[flt_anretry];
+ uexp.fltamcopy = (int)counters[flt_amcopy];
+ uexp.fltnamap = (int)counters[flt_namap];
+ uexp.fltnomap = (int)counters[flt_nomap];
+ uexp.fltlget = (int)counters[flt_lget];
+ uexp.fltget = (int)counters[flt_get];
+ uexp.flt_anon = (int)counters[flt_anon];
+ uexp.flt_acow = (int)counters[flt_acow];
+ uexp.flt_obj = (int)counters[flt_obj];
+ uexp.flt_prcopy = (int)counters[flt_prcopy];
+ uexp.flt_przero = (int)counters[flt_przero];
+
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &uexp,
+ sizeof(uexp)));
+ }
case VM_NKMEMPAGES:
return (sysctl_rdint(oldp, oldlenp, newp, nkmempages));
Index: uvm/uvmexp.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvmexp.h,v
retrieving revision 1.7
diff -u -p -r1.7 uvmexp.h
--- uvm/uvmexp.h 14 Dec 2020 13:29:18 -0000 1.7
+++ uvm/uvmexp.h 21 Dec 2020 19:37:13 -0000
@@ -156,4 +156,41 @@ struct _ps_strings {
void *val;
};
+#ifdef _KERNEL
+
+/*
+ * Per-cpu UVM counters.
+ */
+extern struct cpumem *uvmexp_counters;
+
+enum uvm_exp_counters {
+ /* stat counters */
+ faults, /* page fault count */
+ pageins, /* pagein operation count */
+
+ /* fault subcounters */
+ flt_noram, /* number of times fault was out of ram */
+ flt_noanon, /* number of times fault was out of anons */
+ flt_noamap, /* number of times fault was out of amap chunks */
+ flt_pgwait, /* number of times fault had to wait on a page */
+ flt_pgrele, /* number of times fault found a released page */
+ flt_relck, /* number of times fault relock called */
+ flt_relckok, /* number of times fault relock is a success */
+ flt_anget, /* number of times fault gets anon page */
+ flt_anretry, /* number of times fault retrys an anon get */
+ flt_amcopy, /* number of times fault clears "needs copy" */
+ flt_namap, /* number of times fault maps a neighbor anon page */
+ flt_nomap, /* number of times fault maps a neighbor obj page */
+ flt_lget, /* number of times fault does a locked pgo_get */
+ flt_get, /* number of times fault does an unlocked get */
+ flt_anon, /* number of times fault anon (case 1a) */
+ flt_acow, /* number of times fault anon cow (case 1b) */
+ flt_obj, /* number of times fault is on object page (2a) */
+ flt_prcopy, /* number of times fault promotes with copy (2b) */
+ flt_przero, /* number of times fault promotes with zerofill (2b) */
+
+ exp_ncounters
+};
+
+#endif /* _KERNEL */
#endif /*_UVM_UVMEXP_ */