On 08/05/18(Tue) 18:03, Martin Pieuchot wrote:
> Diff below, gypped from jmatthew@, allows us to see how much time CPUs
> spend spinning on a mutex or on the KERNEL_LOCK().
> 
> On my sparc64 with 16 CPUs, top(1) now reports:
> 
> 16  CPUs: 16.8% user,  0.0% nice,  9.2% sys, 49.4% spin,  0.1% intr, 24.5% 
> idle
> 
> And systat(1) CPU view:
> 
> CPU            User         Nice      System        Spin   Interrupt        
> Idle
> 0              3.9%         0.0%        3.9%       74.5%        2.0%       
> 15.7%
> 1              5.9%         0.0%       21.6%       72.5%        0.0%        
> 0.0%
> 2              9.8%         0.0%        5.9%       84.3%        0.0%        
> 0.0%
> 3              5.9%         0.0%        9.8%       82.4%        0.0%        
> 2.0%
> ...
> 15             2.0%         0.0%       13.7%       56.9%        0.0%       
> 27.5%
> 
> To keep it simple, systat(1) vmstat view now shows the spinning time instead
> of the nice time.
> 
> I'd like to put something like that in, to be able to better understand
> profiling and performance analysis.

Updated diff that addresses issues pointed by bluhm@ and visa@:

- Count spinning time in interrupt handlers

- Move `ci_spinning' to the per-CPU scheduler state

- Increment/decrement `spc_spinning' to not corrupt the value if
  spinning recursively in an interrupt

Many people support the idea so far, so ok?

Index: usr.bin/top/machine.c
===================================================================
RCS file: /cvs/src/usr.bin/top/machine.c,v
retrieving revision 1.89
diff -u -p -r1.89 machine.c
--- usr.bin/top/machine.c       30 May 2017 06:01:30 -0000      1.89
+++ usr.bin/top/machine.c       9 May 2018 10:44:50 -0000
@@ -108,7 +108,7 @@ char *procstatenames[] = {
 /* these are for detailing the cpu states */
 int64_t *cpu_states;
 char *cpustatenames[] = {
-       "user", "nice", "system", "interrupt", "idle", NULL
+       "user", "nice", "sys", "spin", "intr", "idle", NULL
 };
 
 /* these are for detailing the memory statistics */
Index: usr.bin/systat/vmstat.c
===================================================================
RCS file: /cvs/src/usr.bin/systat/vmstat.c,v
retrieving revision 1.83
diff -u -p -r1.83 vmstat.c
--- usr.bin/systat/vmstat.c     3 May 2018 07:49:18 -0000       1.83
+++ usr.bin/systat/vmstat.c     9 May 2018 10:44:50 -0000
@@ -268,7 +268,7 @@ labelkre(void)
        mvprintw(GENSTATROW, GENSTATCOL, "   Csw   Trp   Sys   Int   Sof  Flt");
 
        mvprintw(GRAPHROW, GRAPHCOL,
-           "    . %%Int    . %%Sys    . %%Usr    . %%Nic    . %%Idle");
+           "    . %%Int    . %%Sys    . %%Usr    . %%Spn    . %%Idle");
        mvprintw(PROCSROW, PROCSCOL, "Proc:r  d  s  w");
        mvprintw(GRAPHROW + 1, GRAPHCOL,
            "|    |    |    |    |    |    |    |    |    |    |");
@@ -306,8 +306,8 @@ labelkre(void)
        } while (0)
 #define MAXFAIL 5
 
-static char cpuchar[CPUSTATES] = { '|', '=', '>', '-', ' ' };
-static char cpuorder[CPUSTATES] = { CP_INTR, CP_SYS, CP_USER, CP_NICE, CP_IDLE 
};
+static char cpuchar[] = { '|', '=', '>', '-', ' ' };
+static char cpuorder[] = { CP_INTR, CP_SYS, CP_USER, CP_SPIN, CP_IDLE };
 
 void
 showkre(void)
@@ -367,7 +367,7 @@ showkre(void)
        psiz = 0;
        f2 = 0.0;
 
-       for (c = 0; c < CPUSTATES; c++) {
+       for (c = 0; c < nitems(cpuorder); c++) {
                i = cpuorder[c];
                f1 = cputime(i);
                f2 += f1;
Index: usr.bin/systat/cpu.c
===================================================================
RCS file: /cvs/src/usr.bin/systat/cpu.c,v
retrieving revision 1.5
diff -u -p -r1.5 cpu.c
--- usr.bin/systat/cpu.c        2 Jan 2016 20:02:40 -0000       1.5
+++ usr.bin/systat/cpu.c        9 May 2018 10:44:50 -0000
@@ -68,21 +68,23 @@ field_def fields_cpu[] = {
        { "User", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
        { "Nice", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
        { "System", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
+       { "Spin", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
        { "Interrupt", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
        { "Idle", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
 };
 
 #define FLD_CPU_CPU    FIELD_ADDR(fields_cpu, 0)
-#define FLD_CPU_INT    FIELD_ADDR(fields_cpu, 1)
-#define FLD_CPU_SYS    FIELD_ADDR(fields_cpu, 2)
-#define FLD_CPU_USR    FIELD_ADDR(fields_cpu, 3)
-#define FLD_CPU_NIC    FIELD_ADDR(fields_cpu, 4)
-#define FLD_CPU_IDLE   FIELD_ADDR(fields_cpu, 5)
+#define FLD_CPU_USR    FIELD_ADDR(fields_cpu, 1)
+#define FLD_CPU_NIC    FIELD_ADDR(fields_cpu, 2)
+#define FLD_CPU_SYS    FIELD_ADDR(fields_cpu, 3)
+#define FLD_CPU_SPIN   FIELD_ADDR(fields_cpu, 4)
+#define FLD_CPU_INT    FIELD_ADDR(fields_cpu, 5)
+#define FLD_CPU_IDLE   FIELD_ADDR(fields_cpu, 6)
 
 /* Define views */
 field_def *view_cpu_0[] = {
-       FLD_CPU_CPU,
-       FLD_CPU_INT, FLD_CPU_SYS, FLD_CPU_USR, FLD_CPU_NIC, FLD_CPU_IDLE, NULL
+       FLD_CPU_CPU, FLD_CPU_USR, FLD_CPU_NIC, FLD_CPU_SYS, FLD_CPU_SPIN,
+       FLD_CPU_INT, FLD_CPU_IDLE, NULL
 };
 
 /* Define view managers */
@@ -232,11 +234,12 @@ initcpu(void)
        do {                                                            \
                if (cur >= dispstart && cur < end) {                    \
                        print_fld_size(FLD_CPU_CPU, (v));               \
-                       print_fld_percentage(FLD_CPU_INT, (cs[0]));     \
-                       print_fld_percentage(FLD_CPU_SYS, (cs[1]));     \
-                       print_fld_percentage(FLD_CPU_USR, (cs[2]));     \
-                       print_fld_percentage(FLD_CPU_NIC, (cs[3]));     \
-                       print_fld_percentage(FLD_CPU_IDLE, (cs[4]));    \
+                       print_fld_percentage(FLD_CPU_USR, (cs[CP_USER]));\
+                       print_fld_percentage(FLD_CPU_NIC, (cs[CP_NICE]));\
+                       print_fld_percentage(FLD_CPU_SYS, (cs[CP_SYS]));\
+                       print_fld_percentage(FLD_CPU_SPIN, (cs[CP_SPIN]));\
+                       print_fld_percentage(FLD_CPU_INT, (cs[CP_INTR]));\
+                       print_fld_percentage(FLD_CPU_IDLE, (cs[CP_IDLE]));      
\
                        end_line();                                     \
                }                                                       \
                if (++cur >= end)                                       \
Index: sys/sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.44
diff -u -p -r1.44 sched.h
--- sys/sys/sched.h     14 Dec 2017 23:21:04 -0000      1.44
+++ sys/sys/sched.h     9 May 2018 10:51:22 -0000
@@ -83,17 +83,20 @@
 #define CP_USER                0
 #define CP_NICE                1
 #define CP_SYS         2
-#define CP_INTR                3
-#define CP_IDLE                4
-#define CPUSTATES      5
+#define CP_SPIN                3
+#define CP_INTR                4
+#define CP_IDLE                5
+#define CPUSTATES      6
 
 #define        SCHED_NQS       32                      /* 32 run queues. */
 
 /*
  * Per-CPU scheduler state.
- * XXX - expose to userland for now.
  */
 struct schedstate_percpu {
+       struct proc *spc_idleproc;      /* idle proc for this cpu */
+       TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
+       LIST_HEAD(,proc) spc_deadproc;
        struct timespec spc_runtime;    /* time curproc started running */
        volatile int spc_schedflags;    /* flags; see below */
        u_int spc_schedticks;           /* ticks for schedclock() */
@@ -102,18 +105,12 @@ struct schedstate_percpu {
        int spc_rrticks;                /* ticks until roundrobin() */
        int spc_pscnt;                  /* prof/stat counter */
        int spc_psdiv;                  /* prof/stat divisor */ 
-       struct proc *spc_idleproc;      /* idle proc for this cpu */
 
        u_int spc_nrun;                 /* procs on the run queues */
        fixpt_t spc_ldavg;              /* shortest load avg. for this cpu */
 
-       TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
        volatile uint32_t spc_whichqs;
-
-#ifdef notyet
-       struct proc *spc_reaper;        /* dead proc reaper */
-#endif
-       LIST_HEAD(,proc) spc_deadproc;
+       volatile u_int spc_spinning;    /* this cpu is currently spinning */
 };
 
 #ifdef _KERNEL
Index: sys/kern/kern_lock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_lock.c,v
retrieving revision 1.63
diff -u -p -r1.63 kern_lock.c
--- sys/kern/kern_lock.c        26 Apr 2018 06:51:48 -0000      1.63
+++ sys/kern/kern_lock.c        9 May 2018 10:49:25 -0000
@@ -113,10 +113,12 @@ ___mp_lock_init(struct __mp_lock *mpl, s
 static __inline void
 __mp_lock_spin(struct __mp_lock *mpl, u_int me)
 {
+       struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
 #ifdef MP_LOCKDEBUG
        int nticks = __mp_lock_spinout;
 #endif
 
+       spc->spc_spinning++;
        while (mpl->mpl_ticket != me) {
                CPU_BUSY_CYCLE();
 
@@ -128,6 +130,7 @@ __mp_lock_spin(struct __mp_lock *mpl, u_
                }
 #endif
        }
+       spc->spc_spinning--;
 }
 
 void
@@ -257,10 +260,12 @@ __mtx_init(struct mutex *mtx, int wantip
 void
 __mtx_enter(struct mutex *mtx)
 {
+       struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
 #ifdef MP_LOCKDEBUG
        int nticks = __mp_lock_spinout;
 #endif
 
+       spc->spc_spinning++;
        while (__mtx_enter_try(mtx) == 0) {
                CPU_BUSY_CYCLE();
 
@@ -272,6 +277,7 @@ __mtx_enter(struct mutex *mtx)
                }
 #endif
        }
+       spc->spc_spinning--;
 }
 
 int
Index: sys/kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.93
diff -u -p -r1.93 kern_clock.c
--- sys/kern/kern_clock.c       22 Jul 2017 14:33:45 -0000      1.93
+++ sys/kern/kern_clock.c       9 May 2018 10:56:24 -0000
@@ -381,6 +381,7 @@ statclock(struct clockframe *frame)
                        return;
                /*
                 * Came from kernel mode, so we were:
+                * - spinning on a lock
                 * - handling an interrupt,
                 * - doing syscall or trap work on behalf of the current
                 *   user process, or
@@ -391,7 +392,9 @@ statclock(struct clockframe *frame)
                 * so that we know how much of its real time was spent
                 * in ``non-process'' (i.e., interrupt) work.
                 */
-               if (CLKF_INTR(frame)) {
+               if (spc->spc_spinning)
+                       spc->spc_cp_time[CP_SPIN]++;
+               else if (CLKF_INTR(frame)) {
                        if (p != NULL)
                                p->p_iticks++;
                        spc->spc_cp_time[CP_INTR]++;

Reply via email to