During a page fault multiples counters are updated.  They fall into two
categories "fault counters" and "global statistics" both of which are
currently represented by int-sized fields inside a global: `uvmexp'.

Diff below makes use of the per-CPU counters_inc(9) API to make sure no
update is lost with an unlocked fault handler.  I only converted the
fields touched by uvm_fault() to have a working solution and start a
discussion.

- Should we keep a single enum for all fields inside `uvmexp' or do we
  want to separate "statistics counters" which are mostly used sys/arch
  from "fault counters" which are only used in uvm/uvm_fault.c?

- The counter_add(9) API deals with uint64_t and currently uvmexp uses
  int.  Should we truncate or change the size of uvmexp fields or do
  something else?

Comments?

Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.302
diff -u -p -r1.302 init_main.c
--- kern/init_main.c    7 Dec 2020 16:55:28 -0000       1.302
+++ kern/init_main.c    21 Dec 2020 19:37:13 -0000
@@ -432,6 +432,7 @@ main(void *framep)
 #endif
 
        mbcpuinit();    /* enable per cpu mbuf data */
+       uvm_init_percpu();
 
        /* init exec and emul */
        init_exec();
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.155
diff -u -p -r1.155 uvm_extern.h
--- uvm/uvm_extern.h    1 Dec 2020 13:56:22 -0000       1.155
+++ uvm/uvm_extern.h    21 Dec 2020 19:37:13 -0000
@@ -289,6 +289,7 @@ void                        uvm_vsunlock_device(struct proc 
*
                            void *);
 void                   uvm_pause(void);
 void                   uvm_init(void); 
+void                   uvm_init_percpu(void);
 int                    uvm_io(vm_map_t, struct uio *, int);
 
 #define        UVM_IO_FIXPROT  0x01
Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.109
diff -u -p -r1.109 uvm_fault.c
--- uvm/uvm_fault.c     8 Dec 2020 12:26:31 -0000       1.109
+++ uvm/uvm_fault.c     21 Dec 2020 19:37:13 -0000
@@ -35,6 +35,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/percpu.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
@@ -271,7 +272,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
        int result;
 
        result = 0;             /* XXX shut up gcc */
-       uvmexp.fltanget++;
+       counters_inc(uvmexp_counters, flt_anget);
         /* bump rusage counters */
        if (anon->an_page)
                curproc->p_ru.ru_minflt++;
@@ -295,7 +296,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
                        if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0)
                                return (VM_PAGER_OK);
                        atomic_setbits_int(&pg->pg_flags, PG_WANTED);
-                       uvmexp.fltpgwait++;
+                       counters_inc(uvmexp_counters, flt_pgwait);
 
                        /*
                         * the last unlock must be an atomic unlock+wait on
@@ -310,7 +311,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
 
                        if (pg == NULL) {               /* out of RAM.  */
                                uvmfault_unlockall(ufi, amap, NULL);
-                               uvmexp.fltnoram++;
+                               counters_inc(uvmexp_counters, flt_noram);
                                uvm_wait("flt_noram1");
                                /* ready to relock and try again */
                        } else {
@@ -325,7 +326,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
                                 * it is ok to read an_swslot here because
                                 * we hold PG_BUSY on the page.
                                 */
-                               uvmexp.pageins++;
+                               counters_inc(uvmexp_counters, pageins);
                                result = uvm_swap_get(pg, anon->an_swslot,
                                    PGO_SYNCIO);
 
@@ -369,7 +370,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
                                uvm_anfree(anon);       /* frees page for us */
                                if (locked)
                                        uvmfault_unlockall(ufi, amap, NULL);
-                               uvmexp.fltpgrele++;
+                               counters_inc(uvmexp_counters, flt_pgrele);
                                return (VM_PAGER_REFAULT);      /* refault! */
                        }
 
@@ -426,7 +427,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
                }
 
                /* try it again! */
-               uvmexp.fltanretry++;
+               counters_inc(uvmexp_counters, flt_anretry);
                continue;
 
        } /* while (1) */
@@ -547,7 +548,7 @@ uvm_fault_check(struct uvm_faultinfo *uf
                        /* need to clear */
                        uvmfault_unlockmaps(ufi, FALSE);
                        uvmfault_amapcopy(ufi);
-                       uvmexp.fltamcopy++;
+                       counters_inc(uvmexp_counters, flt_amcopy);
                        return (ERESTART);
                } else {
                        /*
@@ -699,7 +700,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
         */
 
        if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
-               uvmexp.flt_acow++;
+               counters_inc(uvmexp_counters, flt_acow);
                oanon = anon;           /* oanon = old */
                anon = uvm_analloc();
                if (anon) {
@@ -710,10 +711,10 @@ uvm_fault_upper(struct uvm_faultinfo *uf
                if (anon == NULL || pg == NULL) {
                        uvmfault_unlockall(ufi, amap, NULL);
                        if (anon == NULL)
-                               uvmexp.fltnoanon++;
+                               counters_inc(uvmexp_counters, flt_noanon);
                        else {
                                uvm_anfree(anon);
-                               uvmexp.fltnoram++;
+                               counters_inc(uvmexp_counters, flt_noram);
                        }
 
                        if (uvm_swapisfull())
@@ -745,7 +746,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
                 * thus, no one can get at it until we are done with it.
                 */
        } else {
-               uvmexp.flt_anon++;
+               counters_inc(uvmexp_counters, flt_anon);
                oanon = anon;
                pg = anon->an_page;
                if (anon->an_ref > 1)     /* disallow writes to ref > 1 anons */
@@ -861,7 +862,7 @@ uvm_fault_upper_lookup(struct uvm_faulti
                        uvm_lock_pageq();
                        uvm_pageactivate(anon->an_page);        /* reactivate */
                        uvm_unlock_pageq();
-                       uvmexp.fltnamap++;
+                       counters_inc(uvmexp_counters, flt_namap);
 
                        /*
                         * Since this isn't the page that's actually faulting,
@@ -909,7 +910,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
        struct vm_page *pages[UVM_MAXRANGE];
        int error = ERESTART;
 
-       uvmexp.faults++;        /* XXX: locking? */
+       counters_inc(uvmexp_counters, faults);
        TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL);
 
        /* init the IN parameters in the ufi */
@@ -994,7 +995,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
         * ("get" has the option of doing a pmap_enter for us)
         */
        if (uobj != NULL) {
-               uvmexp.fltlget++;
+               counters_inc(uvmexp_counters, flt_lget);
                gotpages = flt->npages;
                (void) uobj->pgops->pgo_get(uobj, ufi->entry->offset +
                                (flt->startva - ufi->entry->start),
@@ -1038,7 +1039,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
                                uvm_lock_pageq();
                                uvm_pageactivate(pages[lcv]);   /* reactivate */
                                uvm_unlock_pageq();
-                               uvmexp.fltnomap++;
+                               counters_inc(uvmexp_counters, flt_nomap);
 
                                /*
                                 * Since this page isn't the page that's
@@ -1109,7 +1110,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 
                uvmfault_unlockall(ufi, amap, NULL);
 
-               uvmexp.fltget++;
+               counters_inc(uvmexp_counters, flt_get);
                gotpages = 1;
                uoff = (ufi->orig_rvaddr - ufi->entry->start) + 
ufi->entry->offset;
                result = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages,
@@ -1183,7 +1184,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
                 *
                 * set "pg" to the page we want to map in (uobjpage, usually)
                 */
-               uvmexp.flt_obj++;
+               counters_inc(uvmexp_counters, flt_obj);
                if (UVM_ET_ISCOPYONWRITE(ufi->entry))
                        flt->enter_prot &= ~PROT_WRITE;
                pg = uobjpage;          /* map in the actual object */
@@ -1235,10 +1236,10 @@ uvm_fault_lower(struct uvm_faultinfo *uf
                        /* unlock and fail ... */
                        uvmfault_unlockall(ufi, amap, uobj);
                        if (anon == NULL)
-                               uvmexp.fltnoanon++;
+                               counters_inc(uvmexp_counters, flt_noanon);
                        else {
                                uvm_anfree(anon);
-                               uvmexp.fltnoram++;
+                               counters_inc(uvmexp_counters, flt_noram);
                        }
 
                        if (uvm_swapisfull())
@@ -1254,7 +1255,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 
                /* fill in the data */
                if (uobjpage != PGO_DONTCARE) {
-                       uvmexp.flt_prcopy++;
+                       counters_inc(uvmexp_counters, flt_prcopy);
                        /* copy page [pg now dirty] */
                        uvm_pagecopy(uobjpage, pg);
 
@@ -1277,7 +1278,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
                        uvm_unlock_pageq();
                        uobj = NULL;
                } else {
-                       uvmexp.flt_przero++;
+                       counters_inc(uvmexp_counters, flt_przero);
                        /*
                         * Page is zero'd and marked dirty by uvm_pagealloc()
                         * above.
@@ -1288,7 +1289,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
                    ufi->orig_rvaddr - ufi->entry->start, anon, 0)) {
                        uvmfault_unlockall(ufi, amap, NULL);
                        uvm_anfree(anon);
-                       uvmexp.fltnoamap++;
+                       counters_inc(uvmexp_counters, flt_noamap);
 
                        if (uvm_swapisfull())
                                return (ENOMEM);
@@ -1580,7 +1581,7 @@ uvmfault_relock(struct uvm_faultinfo *uf
                return TRUE;
        }
 
-       uvmexp.fltrelck++;
+       counters_inc(uvmexp_counters, flt_relck);
 
        /*
         * relock map.   fail if version mismatch (in which case nothing
@@ -1592,6 +1593,6 @@ uvmfault_relock(struct uvm_faultinfo *uf
                return(FALSE);
        }
 
-       uvmexp.fltrelckok++;
+       counters_inc(uvmexp_counters, flt_relckok);
        return(TRUE);           /* got it! */
 }
Index: uvm/uvm_init.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_init.c,v
retrieving revision 1.40
diff -u -p -r1.40 uvm_init.c
--- uvm/uvm_init.c      11 May 2017 00:42:05 -0000      1.40
+++ uvm/uvm_init.c      21 Dec 2020 19:37:13 -0000
@@ -35,6 +35,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/filedesc.h>
+#include <sys/percpu.h>
 #include <sys/resourcevar.h>
 #include <sys/mman.h>
 #include <sys/malloc.h>
@@ -52,6 +53,9 @@
 struct uvm uvm;                /* decl */
 struct uvmexp uvmexp;  /* decl */
 
+COUNTERS_BOOT_MEMORY(uvmexp_countersboot, exp_ncounters);
+struct cpumem *uvmexp_counters = 
COUNTERS_BOOT_INITIALIZER(uvmexp_countersboot);
+
 #if defined(VM_MIN_KERNEL_ADDRESS)
 vaddr_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS;
 #else
@@ -184,4 +188,10 @@ uvm_init(void)
            uaddr_bestfit_create(vm_map_min(kmem_map),
            vm_map_max(kmem_map)));
 #endif /* !SMALL_KERNEL */
+}
+
+void
+uvm_init_percpu(void)
+{
+       uvmexp_counters = counters_alloc_ncpus(uvmexp_counters, exp_ncounters);
 }
Index: uvm/uvm_meter.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.41
diff -u -p -r1.41 uvm_meter.c
--- uvm/uvm_meter.c     24 Jun 2020 22:03:45 -0000      1.41
+++ uvm/uvm_meter.c     21 Dec 2020 19:37:13 -0000
@@ -39,6 +39,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/percpu.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
@@ -178,9 +179,42 @@ uvm_sysctl(int *name, u_int namelen, voi
                return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
                    sizeof(vmtotals)));
 
-       case VM_UVMEXP:
-               return (sysctl_rdstruct(oldp, oldlenp, newp, &uvmexp,
-                   sizeof(uvmexp)));
+       case VM_UVMEXP: {
+               struct uvmexp uexp;
+               uint64_t counters[exp_ncounters];
+
+               memcpy(&uexp, &uvmexp, sizeof(uexp));
+
+               counters_read(uvmexp_counters, counters, exp_ncounters);
+
+               /* stat counters */
+               uexp.faults = (int)counters[faults];
+               uexp.pageins = (int)counters[pageins];
+
+               /* fault subcounters */
+               uexp.fltnoram = (int)counters[flt_noram];
+               uexp.fltnoanon = (int)counters[flt_noanon];
+               uexp.fltnoamap = (int)counters[flt_noamap];
+               uexp.fltpgwait = (int)counters[flt_pgwait];
+               uexp.fltpgrele = (int)counters[flt_pgrele];
+               uexp.fltrelck = (int)counters[flt_relck];
+               uexp.fltrelckok = (int)counters[flt_relckok];
+               uexp.fltanget = (int)counters[flt_anget];
+               uexp.fltanretry = (int)counters[flt_anretry];
+               uexp.fltamcopy = (int)counters[flt_amcopy];
+               uexp.fltnamap = (int)counters[flt_namap];
+               uexp.fltnomap = (int)counters[flt_nomap];
+               uexp.fltlget = (int)counters[flt_lget];
+               uexp.fltget = (int)counters[flt_get];
+               uexp.flt_anon = (int)counters[flt_anon];
+               uexp.flt_acow = (int)counters[flt_acow];
+               uexp.flt_obj = (int)counters[flt_obj];
+               uexp.flt_prcopy = (int)counters[flt_prcopy];
+               uexp.flt_przero = (int)counters[flt_przero];
+
+               return (sysctl_rdstruct(oldp, oldlenp, newp, &uexp,
+                   sizeof(uexp)));
+       }
 
        case VM_NKMEMPAGES:
                return (sysctl_rdint(oldp, oldlenp, newp, nkmempages));
Index: uvm/uvmexp.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvmexp.h,v
retrieving revision 1.7
diff -u -p -r1.7 uvmexp.h
--- uvm/uvmexp.h        14 Dec 2020 13:29:18 -0000      1.7
+++ uvm/uvmexp.h        21 Dec 2020 19:37:13 -0000
@@ -156,4 +156,41 @@ struct _ps_strings {
        void    *val;
 };
 
+#ifdef _KERNEL
+
+/*
+ * Per-cpu UVM counters.
+ */
+extern struct cpumem *uvmexp_counters;
+
+enum uvm_exp_counters {
+       /* stat counters */
+       faults,         /* page fault count */
+       pageins,        /* pagein operation count */
+
+       /* fault subcounters */
+       flt_noram,      /* number of times fault was out of ram */
+       flt_noanon,     /* number of times fault was out of anons */
+       flt_noamap,     /* number of times fault was out of amap chunks */
+       flt_pgwait,     /* number of times fault had to wait on a page */
+       flt_pgrele,     /* number of times fault found a released page */
+       flt_relck,      /* number of times fault relock called */
+       flt_relckok,    /* number of times fault relock is a success */
+       flt_anget,      /* number of times fault gets anon page */
+       flt_anretry,    /* number of times fault retrys an anon get */
+       flt_amcopy,     /* number of times fault clears "needs copy" */
+       flt_namap,      /* number of times fault maps a neighbor anon page */
+       flt_nomap,      /* number of times fault maps a neighbor obj page */
+       flt_lget,       /* number of times fault does a locked pgo_get */
+       flt_get,        /* number of times fault does an unlocked get */
+       flt_anon,       /* number of times fault anon (case 1a) */
+       flt_acow,       /* number of times fault anon cow (case 1b) */
+       flt_obj,        /* number of times fault is on object page (2a) */
+       flt_prcopy,     /* number of times fault promotes with copy (2b) */
+       flt_przero,     /* number of times fault promotes with zerofill (2b) */
+
+       exp_ncounters
+};
+
+#endif /* _KERNEL */
 #endif /*_UVM_UVMEXP_ */

Reply via email to