Hi,

systat(1)'s vmstat view displays rates for things like interrupts.
Strangely, it uses CPU time to compute these rates, not real time.

This is potentially misleading, particularly on an MP system.  If I
have 4 cores running on a HZ=100 kernel I expect ~400 clock interrupts
per second.  But systat(1) tells me I have 100 because we have 4
seconds worth of CPU time for every second of real time that elapses.

I don't like it.  I want to see how many interrupts there really were.

This patch changes the vmstat view to use CLOCK_UPTIME to measure
elapsed time and uses that when computing rates.  The "Big Bar" is
still drawn using CPU time, but for everything else I think you would
want a rate based on the elapsed real time.  Using CPU time isn't
intuitive.

We want CLOCK_UPTIME, not CLOCK_MONOTONIC, because we aren't
interested in what the machine was doing when it was suspended.

I have not changed dinfo() to keep the patch simple, but we should be
using CLOCK_UPTIME there, too.

Thoughts?

Index: vmstat.c
===================================================================
RCS file: /cvs/src/usr.bin/systat/vmstat.c,v
retrieving revision 1.91
diff -u -p -r1.91 vmstat.c
--- vmstat.c    28 Jun 2019 13:35:04 -0000      1.91
+++ vmstat.c    16 Sep 2020 03:56:14 -0000
@@ -320,7 +320,7 @@ labelkre(void)
 #define PUTRATE(fld, l, c, w) \
        do { \
                Y(fld); \
-               putint((int)((float)s.fld/etime + 0.5), l, c, w); \
+               putint((int)((float)s.fld / eruntime + 0.5), l, c, w); \
        } while (0)
 #define MAXFAIL 5
 
@@ -330,12 +330,18 @@ static    char cpuorder[] = { CP_INTR, CP_S
 void
 showkre(void)
 {
+       static struct timespec prev;
+       struct timespec elapsed, now;
        float f1, f2;
        int psiz;
        u_int64_t inttotal, intcnt;
        int i, l, c;
        static int failcnt = 0, first_run = 0;
-       double etime;
+       double ecputime, eruntime;
+
+       clock_gettime(CLOCK_UPTIME, &now);
+       timespecsub(&now, &prev, &elapsed);
+       prev = now;
 
        if (state == TIME) {
                if (!first_run) {
@@ -343,12 +349,13 @@ showkre(void)
                        return;
                }
        }
-       etime = 0;
+       eruntime = elapsed.tv_sec + elapsed.tv_nsec / 1000000000.0;
+       ecputime = 0;
        for (i = 0; i < CPUSTATES; i++) {
                X(cpustats.cs_time);
-               etime += s.cpustats.cs_time[i];
+               ecputime += s.cpustats.cs_time[i];
        }
-       if (etime < 5.0) {      /* < 5 ticks - ignore this trash */
+       if (ecputime < 5.0) {   /* < 5 ticks - ignore this trash */
                if (failcnt++ >= MAXFAIL) {
                        error("The alternate system clock has died!");
                        failcnt = 0;
@@ -356,12 +363,12 @@ showkre(void)
                return;
        }
        failcnt = 0;
-       etime /= hertz;
+       ecputime /= hertz;
        inttotal = 0;
        for (i = 0; i < nintr; i++) {
                t = intcnt = s.intrcnt[i];
                s.intrcnt[i] -= s1.intrcnt[i];
-               intcnt = (u_int64_t)((float)s.intrcnt[i]/etime + 0.5);
+               intcnt = (u_int64_t)((float)s.intrcnt[i] / eruntime + 0.5);
                inttotal += intcnt;
                if (intrloc[i] != 0)
                        putuint64(intcnt, intrloc[i], INTSCOL, 8);

Reply via email to