Back in the old CSRG times, in the 80s, before the introduction of
PCATCH and tsleep(9), PZERO was a clever hack. The following comment
from sleep() in kern/kern_synch.c 3.1 explains it well:
* The most important effect of pri is that when
* pri<=PZERO a signal cannot disturb the sleep;
* if pri>PZERO signals will be processed.
The reason for such distinction is better explained by a comment in
sys/vmmeter.h, still present in OpenBSD:
u_int16_t t_dw; /* jobs in ``disk wait'' (neg priority) */
In 2017, "neg priority" translates to a value of ``p_priority'' between
0 and PZERO. Back in the old times they were used for "short sleep".
That's why the original getloadavg(3) calculus always included processes
doing a short sleep in the number of running process:
if (p->p_pri < PZERO)
nrun++;
That's why until today we can see a lot of (PZERO + 1) or (PZERO - 1) in
the tree.
In the 90s everything changed. tsleep(9) and PCATCH arrived to create
more confusion about what a sleeping priority really means! In the
version 7.7 of vm/vm_meter.c mckusick@ changed how running processes
where accounted:
if (p->p_pri <= PZERO && p->p_slptime == 0)
nrun++;
The ``p_slptime'' check means that long sleeping processes (> 1sec) were
no longer included in getloadavg(3). So in 1990 the kernel already had
processes sleeping for a "long" time with a priority <= PZERO! PZERO
was already a lie...
The diff below get rids of this lie. Stop counting sleeping processes
as "running". There's no way to know if the process is sleeping for a
short period or not. This eliminate some false positive, for example
when the idling thread or softclock thread are accounted as running.
It also simplifies uvm_total() to just report the number of sleeping
thread.
While here, update vmstat(8) to reflect this change and make it fit in
80chars. Here's the new output:
procs memory page disks traps cpu
r s avm fre flt re pi po fr sr sd0 sd1 int sys cs us sy id
1 40 922M 3588M 3466 0 0 0 0 0 50 50 353 12810 1314 5 4 91
Index: sys/uvm/uvm_meter.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.36
diff -u -p -r1.36 uvm_meter.c
--- sys/uvm/uvm_meter.c 14 Mar 2015 03:38:53 -0000 1.36
+++ sys/uvm/uvm_meter.c 30 Apr 2017 16:37:08 -0000
@@ -108,14 +108,14 @@ uvm_loadav(struct loadavg *avg)
LIST_FOREACH(p, &allproc, p_list) {
switch (p->p_stat) {
+ case SSTOP:
case SSLEEP:
- if (p->p_priority > PZERO || p->p_slptime > 1)
- continue;
- /* FALLTHROUGH */
+ break;
case SRUN:
case SONPROC:
if (p == p->p_cpu->ci_schedstate.spc_idleproc)
continue;
+ /* FALLTHROUGH */
case SIDL:
nrun++;
if (p->p_cpu)
@@ -136,7 +136,7 @@ uvm_loadav(struct loadavg *avg)
spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE *
(FSCALE - cexp[0])) >> FSHIFT;
- }
+ }
}
/*
@@ -254,24 +254,19 @@ uvm_total(struct vmtotal *totalp)
/* calculate process statistics */
LIST_FOREACH(p, &allproc, p_list) {
- if (p->p_flag & P_SYSTEM)
- continue;
switch (p->p_stat) {
case 0:
continue;
case SSLEEP:
case SSTOP:
- if (p->p_priority <= PZERO)
- totalp->t_dw++;
- else if (p->p_slptime < maxslp)
- totalp->t_sl++;
- if (p->p_slptime >= maxslp)
- continue;
+ totalp->t_sl++;
break;
case SRUN:
- case SIDL:
case SONPROC:
+ if (p == p->p_cpu->ci_schedstate.spc_idleproc)
+ continue;
+ case SIDL:
totalp->t_rq++;
if (p->p_stat == SIDL)
continue;
Index: usr.bin/vmstat/vmstat.8
===================================================================
RCS file: /cvs/src/usr.bin/vmstat/vmstat.8,v
retrieving revision 1.37
diff -u -p -r1.37 vmstat.8
--- usr.bin/vmstat/vmstat.8 30 Mar 2016 06:58:06 -0000 1.37
+++ usr.bin/vmstat/vmstat.8 30 Apr 2017 17:24:17 -0000
@@ -121,16 +121,11 @@ Information about the numbers of process
.Bl -tag -width 4n -compact
.It Li r
in run queue
-.It Li b
-blocked for resources (I/O, paging, etc.)
-.It Li w
-runnable or short sleeper (< 20 secs) but swapped
+.It Li s
+sleeping
.El
.It Li memory
Information about the usage of virtual and real memory.
-Virtual pages
-(reported in units of 1024 bytes) are considered active if they belong
-to processes which are running or have run in the last 20 seconds.
.Pp
.Bl -tag -width 4n -compact
.It Li avm
Index: usr.bin/vmstat/vmstat.c
===================================================================
RCS file: /cvs/src/usr.bin/vmstat/vmstat.c,v
retrieving revision 1.141
diff -u -p -r1.141 vmstat.c
--- usr.bin/vmstat/vmstat.c 14 Aug 2016 22:47:26 -0000 1.141
+++ usr.bin/vmstat/vmstat.c 30 Apr 2017 17:20:51 -0000
@@ -358,13 +358,12 @@ dovmstat(u_int interval, int reps)
warn("could not read vm.vmmeter");
memset(&total, 0, sizeof(total));
}
- (void)printf(" %u %u %u ",
- total.t_rq - 1, total.t_dw + total.t_pw, total.t_sw);
+ (void)printf("%2u %3u", total.t_rq - 1, total.t_sl);
#define rate(x) ((unsigned)((((unsigned)x) + halfuptime) / uptime)) /*
round */
#define pgtok(a) ((a) * ((unsigned int)uvmexp.pagesize >> 10))
- (void)printf("%6u %7u ",
- pgtok(uvmexp.active + uvmexp.swpginuse),
- pgtok(uvmexp.free));
+ (void)printf("%5uM %6uM ",
+ pgtok(uvmexp.active + uvmexp.swpginuse) / 1024,
+ pgtok(uvmexp.free) / 1024);
(void)printf("%4u ", rate(uvmexp.faults - ouvmexp.faults));
(void)printf("%3u ", rate(uvmexp.pdreact - ouvmexp.pdreact));
(void)printf("%3u ", rate(uvmexp.pageins - ouvmexp.pageins));
@@ -410,7 +409,7 @@ printhdr(void)
(void)printf("%*s traps cpu\n",
ndrives * 3, "");
- (void)printf(" r b w avm fre flt re pi po fr sr ");
+ (void)printf(" r s avm fre flt re pi po fr sr ");
for (i = 0; i < dk_ndrive; i++)
if (dk_select[i])
(void)printf("%c%c%c ", dr_name[i][0],