Expose some scheduler statistics to userland via sysctl

2011-11-17 Thread Christiano F. Haesbaert
Moving this to tech@

Hi,

I was studying the scheduler code after watching tedu's talk, I'd like
to expose this statistics to userland so that I can try playing with
cache affinity in the future:

gimli:src: sysctl kern.schedstat

  
kern.schedstat.nmigrations=23744
kern.schedstat.noidle=0
kern.schedstat.stolen=9170
kern.schedstat.choose=834843
kern.schedstat.wasidle=808711
kern.schedstat.nomigrations=2388

Opinions ?

Index: sys/sys//sched.h
===
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.29
diff -d -u -p -w -r1.29 sched.h
--- sys/sys//sched.h7 Jul 2011 18:00:33 -   1.29
+++ sys/sys//sched.h12 Nov 2011 13:51:04 -
@@ -75,6 +75,34 @@
  * Posix defines a sched.h which may want to include sys/sched.h
  */
 
+struct schedstat {
+   u_int64_t scs_nmigrations;
+   u_int64_t scs_noidle;
+   u_int64_t scs_stolen;
+
+   u_int64_t scs_choose;
+   u_int64_t scs_wasidle;
+   u_int64_t scs_nomigrations;
+};
+
+/* These sysctl names are only really used by sysctl(8) */
+#define KERN_SCHEDSTAT_NMIGRATIONS 1
+#define KERN_SCHEDSTAT_NOIDLE  2
+#define KERN_SCHEDSTAT_STOLEN  3
+#define KERN_SCHEDSTAT_CHOOSE  4
+#define KERN_SCHEDSTAT_WASIDLE 5
+#define KERN_SCHEDSTAT_NOMIGRATIONS6
+#define KERN_SCHEDSTAT_MAXID   7
+
+#define CTL_KERN_SCHEDSTAT_NAMES { \
+   { 0, 0 },   \
+   { nmigrations, CTLTYPE_QUAD },\
+   { noidle, CTLTYPE_QUAD }, \
+   { stolen, CTLTYPE_QUAD }, \
+   { choose, CTLTYPE_QUAD }, \
+   { wasidle, CTLTYPE_QUAD },\
+   { nomigrations, CTLTYPE_QUAD }\
+}
 /*
  * CPU states.
  * XXX Not really scheduler state, but no other good place to put
Index: sys/sys//sysctl.h
===
RCS file: /cvs/src/sys/sys/sysctl.h,v
retrieving revision 1.117
diff -d -u -p -w -r1.117 sysctl.h
--- sys/sys//sysctl.h   30 Aug 2011 01:09:29 -  1.117
+++ sys/sys//sysctl.h   12 Nov 2011 13:40:45 -
@@ -189,7 +189,8 @@ struct ctlname {
 #defineKERN_CONSDEV75  /* dev_t: console terminal 
device */
 #defineKERN_NETLIVELOCKS   76  /* int: number of network 
livelocks */
 #defineKERN_POOL_DEBUG 77  /* int: enable pool_debug */
-#defineKERN_MAXID  78  /* number of valid kern ids */
+#define KERN_SCHEDSTAT 78  /* struct: sched statistics */
+#defineKERN_MAXID  79  /* number of valid kern ids */
 
 #defineCTL_KERN_NAMES { \
{ 0, 0 }, \
@@ -270,6 +271,7 @@ struct ctlname {
{ consdev, CTLTYPE_STRUCT }, \
{ netlivelocks, CTLTYPE_INT }, \
{ pool_debug, CTLTYPE_INT }, \
+   { schedstat, CTLTYPE_STRUCT }, \
 }
 
 /*
Index: sys/kern//kern_sched.c
===
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.24
diff -d -u -p -w -r1.24 kern_sched.c
--- sys/kern//kern_sched.c  12 Oct 2011 18:30:09 -  1.24
+++ sys/kern//kern_sched.c  12 Nov 2011 14:41:59 -
@@ -35,6 +35,8 @@ void sched_kthreads_create(void *);
 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
 struct proc *sched_steal_proc(struct cpu_info *);
 
+struct schedstat schedstat;
+
 /*
  * To help choosing which cpu should run which process we keep track
  * of cpus which are currently idle and which cpus have processes
@@ -301,14 +303,6 @@ again:
return (p); 
 }
 
-uint64_t sched_nmigrations;
-uint64_t sched_noidle;
-uint64_t sched_stolen;
-
-uint64_t sched_choose;
-uint64_t sched_wasidle;
-uint64_t sched_nomigrations;
-
 struct cpu_info *
 sched_choosecpu_fork(struct proc *parent, int flags)
 {
@@ -374,7 +368,7 @@ sched_choosecpu(struct proc *p)
if (p-p_flag  P_CPUPEG)
return (p-p_cpu);
 
-   sched_choose++;
+   schedstat.scs_choose++;
 
/*
 * Look at all cpus that are currently idle and have nothing queued.
@@ -393,7 +387,7 @@ sched_choosecpu(struct proc *p)
if (cpuset_isset(set, p-p_cpu) ||
(p-p_cpu == curcpu()  p-p_cpu-ci_schedstate.spc_nrun == 0 
curproc == p)) {
-   sched_wasidle++;
+   schedstat.scs_wasidle++;
return (p-p_cpu);
}
 
@@ -411,9 +405,9 @@ sched_choosecpu(struct proc *p)
}
 
if (p-p_cpu != choice)
-   sched_nmigrations++;
+   schedstat.scs_nmigrations++;
else
-   sched_nomigrations++;
+   schedstat.scs_nomigrations++;
 
return (choice);
 }
@@ -461,7 +455,7 @@ sched_steal_proc(struct cpu_info *self)
 

Re: Expose some scheduler statistics to userland via sysctl

2011-11-17 Thread Thordur Bjornsson
On 2011 Nov 17 (Thu) at 21:18:24 -0200 (-0200), Christiano F. Haesbaert wrote:
 Moving this to tech@
 
 Hi,
 
 I was studying the scheduler code after watching tedu's talk, I'd like
 to expose this statistics to userland so that I can try playing with
 cache affinity in the future:
 
 gimli:src: sysctl kern.schedstat  
   
   
 kern.schedstat.nmigrations=23744
 kern.schedstat.noidle=0
 kern.schedstat.stolen=9170
 kern.schedstat.choose=834843
 kern.schedstat.wasidle=808711
 kern.schedstat.nomigrations=2388
 
 Opinions ?

I see no point in exporting this out. They are essentially pointless
knobs that people _will_ fiddle with without a clue.

This belongs in a developers tree IMO.

 
 Index: sys/sys//sched.h
 ===
 RCS file: /cvs/src/sys/sys/sched.h,v
 retrieving revision 1.29
 diff -d -u -p -w -r1.29 sched.h
 --- sys/sys//sched.h  7 Jul 2011 18:00:33 -   1.29
 +++ sys/sys//sched.h  12 Nov 2011 13:51:04 -
 @@ -75,6 +75,34 @@
   * Posix defines a sched.h which may want to include sys/sched.h
   */
  
 +struct schedstat {
 + u_int64_t scs_nmigrations;
 + u_int64_t scs_noidle;
 + u_int64_t scs_stolen;
 +
 + u_int64_t scs_choose;
 + u_int64_t scs_wasidle;
 + u_int64_t scs_nomigrations;
 +};
 +
 +/* These sysctl names are only really used by sysctl(8) */
 +#define KERN_SCHEDSTAT_NMIGRATIONS   1
 +#define KERN_SCHEDSTAT_NOIDLE2
 +#define KERN_SCHEDSTAT_STOLEN3
 +#define KERN_SCHEDSTAT_CHOOSE4
 +#define KERN_SCHEDSTAT_WASIDLE   5
 +#define KERN_SCHEDSTAT_NOMIGRATIONS  6
 +#define KERN_SCHEDSTAT_MAXID 7
 +
 +#define CTL_KERN_SCHEDSTAT_NAMES {   \
 + { 0, 0 },   \
 + { nmigrations, CTLTYPE_QUAD },\
 + { noidle, CTLTYPE_QUAD }, \
 + { stolen, CTLTYPE_QUAD }, \
 + { choose, CTLTYPE_QUAD }, \
 + { wasidle, CTLTYPE_QUAD },\
 + { nomigrations, CTLTYPE_QUAD }\
 +}
  /*
   * CPU states.
   * XXX Not really scheduler state, but no other good place to put
 Index: sys/sys//sysctl.h
 ===
 RCS file: /cvs/src/sys/sys/sysctl.h,v
 retrieving revision 1.117
 diff -d -u -p -w -r1.117 sysctl.h
 --- sys/sys//sysctl.h 30 Aug 2011 01:09:29 -  1.117
 +++ sys/sys//sysctl.h 12 Nov 2011 13:40:45 -
 @@ -189,7 +189,8 @@ struct ctlname {
  #define  KERN_CONSDEV75  /* dev_t: console terminal 
 device */
  #define  KERN_NETLIVELOCKS   76  /* int: number of network 
 livelocks */
  #define  KERN_POOL_DEBUG 77  /* int: enable pool_debug */
 -#define  KERN_MAXID  78  /* number of valid kern ids */
 +#define KERN_SCHEDSTAT   78  /* struct: sched statistics */
 +#define  KERN_MAXID  79  /* number of valid kern ids */
  
  #define  CTL_KERN_NAMES { \
   { 0, 0 }, \
 @@ -270,6 +271,7 @@ struct ctlname {
   { consdev, CTLTYPE_STRUCT }, \
   { netlivelocks, CTLTYPE_INT }, \
   { pool_debug, CTLTYPE_INT }, \
 + { schedstat, CTLTYPE_STRUCT }, \
  }
  
  /*
 Index: sys/kern//kern_sched.c
 ===
 RCS file: /cvs/src/sys/kern/kern_sched.c,v
 retrieving revision 1.24
 diff -d -u -p -w -r1.24 kern_sched.c
 --- sys/kern//kern_sched.c12 Oct 2011 18:30:09 -  1.24
 +++ sys/kern//kern_sched.c12 Nov 2011 14:41:59 -
 @@ -35,6 +35,8 @@ void sched_kthreads_create(void *);
  int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
  struct proc *sched_steal_proc(struct cpu_info *);
  
 +struct schedstat schedstat;
 +
  /*
   * To help choosing which cpu should run which process we keep track
   * of cpus which are currently idle and which cpus have processes
 @@ -301,14 +303,6 @@ again:
   return (p); 
  }
  
 -uint64_t sched_nmigrations;
 -uint64_t sched_noidle;
 -uint64_t sched_stolen;
 -
 -uint64_t sched_choose;
 -uint64_t sched_wasidle;
 -uint64_t sched_nomigrations;
 -
  struct cpu_info *
  sched_choosecpu_fork(struct proc *parent, int flags)
  {
 @@ -374,7 +368,7 @@ sched_choosecpu(struct proc *p)
   if (p-p_flag  P_CPUPEG)
   return (p-p_cpu);
  
 - sched_choose++;
 + schedstat.scs_choose++;
  
   /*
* Look at all cpus that are currently idle and have nothing queued.
 @@ -393,7 +387,7 @@ sched_choosecpu(struct proc *p)
   if (cpuset_isset(set, p-p_cpu) ||
   (p-p_cpu == curcpu()  p-p_cpu-ci_schedstate.spc_nrun == 0 
   curproc == p)) {
 - sched_wasidle++;
 + schedstat.scs_wasidle++;
   return (p-p_cpu);
   }
  
 @@ -411,9 +405,9 @@ sched_choosecpu(struct proc 

Re: Expose some scheduler statistics to userland via sysctl

2011-11-17 Thread Ted Unangst
On Fri, Nov 18, 2011, Thordur Bjornsson wrote:
 On 2011 Nov 17 (Thu) at 21:18:24 -0200 (-0200), Christiano F. Haesbaert
 wrote:
 Moving this to tech@

 Hi,

 I was studying the scheduler code after watching tedu's talk, I'd like
 to expose this statistics to userland so that I can try playing with
 cache affinity in the future:

 gimli:src: sysctl kern.schedstat
 
 kern.schedstat.nmigrations=23744
 kern.schedstat.noidle=0
 kern.schedstat.stolen=9170
 kern.schedstat.choose=834843
 kern.schedstat.wasidle=808711
 kern.schedstat.nomigrations=2388

 Opinions ?
 
 I see no point in exporting this out. They are essentially pointless
 knobs that people _will_ fiddle with without a clue.

???  They're readonly.  



Re: Expose some scheduler statistics to userland via sysctl

2011-11-17 Thread Christiano F. Haesbaert
On 17 November 2011 23:39, Ted Unangst t...@tedunangst.com wrote:
 On Fri, Nov 18, 2011, Thordur Bjornsson wrote:
 On 2011 Nov 17 (Thu) at 21:18:24 -0200 (-0200), Christiano F. Haesbaert
 wrote:
 Moving this to tech@

 Hi,

 I was studying the scheduler code after watching tedu's talk, I'd like
 to expose this statistics to userland so that I can try playing with
 cache affinity in the future:

 gimli:src: sysctl kern.schedstat

 kern.schedstat.nmigrations=23744
 kern.schedstat.noidle=0
 kern.schedstat.stolen=9170
 kern.schedstat.choose=834843
 kern.schedstat.wasidle=808711
 kern.schedstat.nomigrations=2388

 Opinions ?

 I see no point in exporting this out. They are essentially pointless
 knobs that people _will_ fiddle with without a clue.

 ???  They're readonly.


Mike just pointed out I can get those with pstat, so that solves my problem.

Still maybe there is a place for sysctl, but I'm not too strong about
it anymore.

What is the line between sysctl vs globals ?



Re: Expose some scheduler statistics to userland via sysctl

2011-11-17 Thread Christiano F. Haesbaert
On 18 November 2011 00:59, Christiano F. Haesbaert
haesba...@haesbaert.org wrote:
 On 17 November 2011 23:39, Ted Unangst t...@tedunangst.com wrote:
 On Fri, Nov 18, 2011, Thordur Bjornsson wrote:
 On 2011 Nov 17 (Thu) at 21:18:24 -0200 (-0200), Christiano F. Haesbaert
 wrote:
 Moving this to tech@

 Hi,

 I was studying the scheduler code after watching tedu's talk, I'd like
 to expose this statistics to userland so that I can try playing with
 cache affinity in the future:

 gimli:src: sysctl kern.schedstat

 kern.schedstat.nmigrations=23744
 kern.schedstat.noidle=0
 kern.schedstat.stolen=9170
 kern.schedstat.choose=834843
 kern.schedstat.wasidle=808711
 kern.schedstat.nomigrations=2388

 Opinions ?

 I see no point in exporting this out. They are essentially pointless
 knobs that people _will_ fiddle with without a clue.

 ???  They're readonly.


 Mike just pointed out I can get those with pstat, so that solves my
problem.

 Still maybe there is a place for sysctl, but I'm not too strong about
 it anymore.

 What is the line between sysctl vs globals ?


Checking again, it makes sense having the sysctl IMHO.
We have something very similar in kern.forkstat, kern.nchstats, and
the protocol stats.



Re: Expose some scheduler statistics to userland via sysctl

2011-11-17 Thread Ted Unangst
On Fri, Nov 18, 2011, Christiano F. Haesbaert wrote:

 What is the line between sysctl vs globals ?

 
 Checking again, it makes sense having the sysctl IMHO.
 We have something very similar in kern.forkstat, kern.nchstats, and
 the protocol stats.

I think those stats are useful for a sysadmin to monitor performance,
and there actions one can take to affect them.  The scheduler is not
really controllable.

If you just want to peek at some values for debugging, pstat is the
way to go.  That's why I added it. ;)