On Wed, May 09, 2018 at 01:36:08PM +0200, Martin Pieuchot wrote:
> On 08/05/18(Tue) 18:03, Martin Pieuchot wrote:
> > Diff below, gypped from jmatthew@, allows us to see how much time CPUs
> > spend spinning on a mutex or on the KERNEL_LOCK().
> > 
> > On my sparc64 with 16 CPUs, top(1) now reports:
> > 
> > 16  CPUs: 16.8% user,  0.0% nice,  9.2% sys, 49.4% spin,  0.1% intr, 24.5% 
> > idle
> > 
> > And systat(1) CPU view:
> > 
> > CPU            User         Nice      System        Spin   Interrupt        
> > Idle
> > 0              3.9%         0.0%        3.9%       74.5%        2.0%       
> > 15.7%
> > 1              5.9%         0.0%       21.6%       72.5%        0.0%        
> > 0.0%
> > 2              9.8%         0.0%        5.9%       84.3%        0.0%        
> > 0.0%
> > 3              5.9%         0.0%        9.8%       82.4%        0.0%        
> > 2.0%
> > ...
> > 15             2.0%         0.0%       13.7%       56.9%        0.0%       
> > 27.5%
> > 
> > To keep it simple, systat(1) vmstat view now shows the spinning time instead
> > of the nice time.
> > 
> > I'd like to put something like that in, to be able to better understand
> > profiling and performance analysis.
> 
> Updated diff that addresses issues pointed by bluhm@ and visa@:
> 
> - Count spinning time in interrupt handlers
> 
> - Move `ci_spinning' to the per-CPU scheduler state
> 
> - Increment/decrement `spc_spinning' to not corrupt the value if
>   spinning recursively in an interrupt
> 
> Many people support the idea so far, so ok?
> 

I like it, no objections here.

-ml

> Index: usr.bin/top/machine.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/top/machine.c,v
> retrieving revision 1.89
> diff -u -p -r1.89 machine.c
> --- usr.bin/top/machine.c     30 May 2017 06:01:30 -0000      1.89
> +++ usr.bin/top/machine.c     9 May 2018 10:44:50 -0000
> @@ -108,7 +108,7 @@ char *procstatenames[] = {
>  /* these are for detailing the cpu states */
>  int64_t *cpu_states;
>  char *cpustatenames[] = {
> -     "user", "nice", "system", "interrupt", "idle", NULL
> +     "user", "nice", "sys", "spin", "intr", "idle", NULL
>  };
>  
>  /* these are for detailing the memory statistics */
> Index: usr.bin/systat/vmstat.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/systat/vmstat.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 vmstat.c
> --- usr.bin/systat/vmstat.c   3 May 2018 07:49:18 -0000       1.83
> +++ usr.bin/systat/vmstat.c   9 May 2018 10:44:50 -0000
> @@ -268,7 +268,7 @@ labelkre(void)
>       mvprintw(GENSTATROW, GENSTATCOL, "   Csw   Trp   Sys   Int   Sof  Flt");
>  
>       mvprintw(GRAPHROW, GRAPHCOL,
> -         "    . %%Int    . %%Sys    . %%Usr    . %%Nic    . %%Idle");
> +         "    . %%Int    . %%Sys    . %%Usr    . %%Spn    . %%Idle");
>       mvprintw(PROCSROW, PROCSCOL, "Proc:r  d  s  w");
>       mvprintw(GRAPHROW + 1, GRAPHCOL,
>           "|    |    |    |    |    |    |    |    |    |    |");
> @@ -306,8 +306,8 @@ labelkre(void)
>       } while (0)
>  #define MAXFAIL 5
>  
> -static       char cpuchar[CPUSTATES] = { '|', '=', '>', '-', ' ' };
> -static       char cpuorder[CPUSTATES] = { CP_INTR, CP_SYS, CP_USER, CP_NICE, 
> CP_IDLE };
> +static       char cpuchar[] = { '|', '=', '>', '-', ' ' };
> +static       char cpuorder[] = { CP_INTR, CP_SYS, CP_USER, CP_SPIN, CP_IDLE 
> };
>  
>  void
>  showkre(void)
> @@ -367,7 +367,7 @@ showkre(void)
>       psiz = 0;
>       f2 = 0.0;
>  
> -     for (c = 0; c < CPUSTATES; c++) {
> +     for (c = 0; c < nitems(cpuorder); c++) {
>               i = cpuorder[c];
>               f1 = cputime(i);
>               f2 += f1;
> Index: usr.bin/systat/cpu.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/systat/cpu.c,v
> retrieving revision 1.5
> diff -u -p -r1.5 cpu.c
> --- usr.bin/systat/cpu.c      2 Jan 2016 20:02:40 -0000       1.5
> +++ usr.bin/systat/cpu.c      9 May 2018 10:44:50 -0000
> @@ -68,21 +68,23 @@ field_def fields_cpu[] = {
>       { "User", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
>       { "Nice", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
>       { "System", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
> +     { "Spin", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
>       { "Interrupt", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
>       { "Idle", 10, 20, 1, FLD_ALIGN_RIGHT, -1, 0, 0, 0 },
>  };
>  
>  #define FLD_CPU_CPU  FIELD_ADDR(fields_cpu, 0)
> -#define FLD_CPU_INT  FIELD_ADDR(fields_cpu, 1)
> -#define FLD_CPU_SYS  FIELD_ADDR(fields_cpu, 2)
> -#define FLD_CPU_USR  FIELD_ADDR(fields_cpu, 3)
> -#define FLD_CPU_NIC  FIELD_ADDR(fields_cpu, 4)
> -#define FLD_CPU_IDLE FIELD_ADDR(fields_cpu, 5)
> +#define FLD_CPU_USR  FIELD_ADDR(fields_cpu, 1)
> +#define FLD_CPU_NIC  FIELD_ADDR(fields_cpu, 2)
> +#define FLD_CPU_SYS  FIELD_ADDR(fields_cpu, 3)
> +#define FLD_CPU_SPIN FIELD_ADDR(fields_cpu, 4)
> +#define FLD_CPU_INT  FIELD_ADDR(fields_cpu, 5)
> +#define FLD_CPU_IDLE FIELD_ADDR(fields_cpu, 6)
>  
>  /* Define views */
>  field_def *view_cpu_0[] = {
> -     FLD_CPU_CPU,
> -     FLD_CPU_INT, FLD_CPU_SYS, FLD_CPU_USR, FLD_CPU_NIC, FLD_CPU_IDLE, NULL
> +     FLD_CPU_CPU, FLD_CPU_USR, FLD_CPU_NIC, FLD_CPU_SYS, FLD_CPU_SPIN,
> +     FLD_CPU_INT, FLD_CPU_IDLE, NULL
>  };
>  
>  /* Define view managers */
> @@ -232,11 +234,12 @@ initcpu(void)
>       do {                                                            \
>               if (cur >= dispstart && cur < end) {                    \
>                       print_fld_size(FLD_CPU_CPU, (v));               \
> -                     print_fld_percentage(FLD_CPU_INT, (cs[0]));     \
> -                     print_fld_percentage(FLD_CPU_SYS, (cs[1]));     \
> -                     print_fld_percentage(FLD_CPU_USR, (cs[2]));     \
> -                     print_fld_percentage(FLD_CPU_NIC, (cs[3]));     \
> -                     print_fld_percentage(FLD_CPU_IDLE, (cs[4]));    \
> +                     print_fld_percentage(FLD_CPU_USR, (cs[CP_USER]));\
> +                     print_fld_percentage(FLD_CPU_NIC, (cs[CP_NICE]));\
> +                     print_fld_percentage(FLD_CPU_SYS, (cs[CP_SYS]));\
> +                     print_fld_percentage(FLD_CPU_SPIN, (cs[CP_SPIN]));\
> +                     print_fld_percentage(FLD_CPU_INT, (cs[CP_INTR]));\
> +                     print_fld_percentage(FLD_CPU_IDLE, (cs[CP_IDLE]));      
> \
>                       end_line();                                     \
>               }                                                       \
>               if (++cur >= end)                                       \
> Index: sys/sys/sched.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/sched.h,v
> retrieving revision 1.44
> diff -u -p -r1.44 sched.h
> --- sys/sys/sched.h   14 Dec 2017 23:21:04 -0000      1.44
> +++ sys/sys/sched.h   9 May 2018 10:51:22 -0000
> @@ -83,17 +83,20 @@
>  #define CP_USER              0
>  #define CP_NICE              1
>  #define CP_SYS               2
> -#define CP_INTR              3
> -#define CP_IDLE              4
> -#define CPUSTATES    5
> +#define CP_SPIN              3
> +#define CP_INTR              4
> +#define CP_IDLE              5
> +#define CPUSTATES    6
>  
>  #define      SCHED_NQS       32                      /* 32 run queues. */
>  
>  /*
>   * Per-CPU scheduler state.
> - * XXX - expose to userland for now.
>   */
>  struct schedstate_percpu {
> +     struct proc *spc_idleproc;      /* idle proc for this cpu */
> +     TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
> +     LIST_HEAD(,proc) spc_deadproc;
>       struct timespec spc_runtime;    /* time curproc started running */
>       volatile int spc_schedflags;    /* flags; see below */
>       u_int spc_schedticks;           /* ticks for schedclock() */
> @@ -102,18 +105,12 @@ struct schedstate_percpu {
>       int spc_rrticks;                /* ticks until roundrobin() */
>       int spc_pscnt;                  /* prof/stat counter */
>       int spc_psdiv;                  /* prof/stat divisor */ 
> -     struct proc *spc_idleproc;      /* idle proc for this cpu */
>  
>       u_int spc_nrun;                 /* procs on the run queues */
>       fixpt_t spc_ldavg;              /* shortest load avg. for this cpu */
>  
> -     TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
>       volatile uint32_t spc_whichqs;
> -
> -#ifdef notyet
> -     struct proc *spc_reaper;        /* dead proc reaper */
> -#endif
> -     LIST_HEAD(,proc) spc_deadproc;
> +     volatile u_int spc_spinning;    /* this cpu is currently spinning */
>  };
>  
>  #ifdef       _KERNEL
> Index: sys/kern/kern_lock.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_lock.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 kern_lock.c
> --- sys/kern/kern_lock.c      26 Apr 2018 06:51:48 -0000      1.63
> +++ sys/kern/kern_lock.c      9 May 2018 10:49:25 -0000
> @@ -113,10 +113,12 @@ ___mp_lock_init(struct __mp_lock *mpl, s
>  static __inline void
>  __mp_lock_spin(struct __mp_lock *mpl, u_int me)
>  {
> +     struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
>  #ifdef MP_LOCKDEBUG
>       int nticks = __mp_lock_spinout;
>  #endif
>  
> +     spc->spc_spinning++;
>       while (mpl->mpl_ticket != me) {
>               CPU_BUSY_CYCLE();
>  
> @@ -128,6 +130,7 @@ __mp_lock_spin(struct __mp_lock *mpl, u_
>               }
>  #endif
>       }
> +     spc->spc_spinning--;
>  }
>  
>  void
> @@ -257,10 +260,12 @@ __mtx_init(struct mutex *mtx, int wantip
>  void
>  __mtx_enter(struct mutex *mtx)
>  {
> +     struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
>  #ifdef MP_LOCKDEBUG
>       int nticks = __mp_lock_spinout;
>  #endif
>  
> +     spc->spc_spinning++;
>       while (__mtx_enter_try(mtx) == 0) {
>               CPU_BUSY_CYCLE();
>  
> @@ -272,6 +277,7 @@ __mtx_enter(struct mutex *mtx)
>               }
>  #endif
>       }
> +     spc->spc_spinning--;
>  }
>  
>  int
> Index: sys/kern/kern_clock.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_clock.c,v
> retrieving revision 1.93
> diff -u -p -r1.93 kern_clock.c
> --- sys/kern/kern_clock.c     22 Jul 2017 14:33:45 -0000      1.93
> +++ sys/kern/kern_clock.c     9 May 2018 10:56:24 -0000
> @@ -381,6 +381,7 @@ statclock(struct clockframe *frame)
>                       return;
>               /*
>                * Came from kernel mode, so we were:
> +              * - spinning on a lock
>                * - handling an interrupt,
>                * - doing syscall or trap work on behalf of the current
>                *   user process, or
> @@ -391,7 +392,9 @@ statclock(struct clockframe *frame)
>                * so that we know how much of its real time was spent
>                * in ``non-process'' (i.e., interrupt) work.
>                */
> -             if (CLKF_INTR(frame)) {
> +             if (spc->spc_spinning)
> +                     spc->spc_cp_time[CP_SPIN]++;
> +             else if (CLKF_INTR(frame)) {
>                       if (p != NULL)
>                               p->p_iticks++;
>                       spc->spc_cp_time[CP_INTR]++;
> 

Reply via email to