Re: [Xen-devel] [PATCH 2/6] xen: credit2: make the cpu to runqueue map per-cpu

2017-07-21 Thread George Dunlap
On 06/23/2017 11:54 AM, Dario Faggioli wrote:
> Instead of keeping an NR_CPUS big array of int-s,
> directly inside csched2_private, use a per-cpu
> variable.
> 
> That's especially beneficial (in terms of saved
> memory) when there are more instance of Credit2 (in
> different cpupools), and also helps fitting
> csched2_private itself into CPU caches.
> 
> Signed-off-by: Dario Faggioli 

Sounds good:

Acked-by: George Dunlap 

> ---
> Cc: George Dunlap 
> Cc: Anshul Makkar 
> ---
>  xen/common/sched_credit2.c |   33 -
>  1 file changed, 20 insertions(+), 13 deletions(-)
> 
> diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
> index 10d9488..15862f2 100644
> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -383,7 +383,6 @@ struct csched2_private {
>  
>  struct list_head sdom; /* Used mostly for dump keyhandler. */
>  
> -int runq_map[NR_CPUS];
>  cpumask_t active_queues; /* Queues which may have active cpus */
>  struct csched2_runqueue_data *rqd;
>  
> @@ -393,6 +392,14 @@ struct csched2_private {
>  };
>  
>  /*
> + * Physical CPU
> + *
> + * The only per-pCPU information we need to maintain is of which runqueue
> + * each CPU is part of.
> + */
> +static DEFINE_PER_CPU(int, runq_map);
> +
> +/*
>   * Virtual CPU
>   */
>  struct csched2_vcpu {
> @@ -448,16 +455,16 @@ static inline struct csched2_dom *csched2_dom(const 
> struct domain *d)
>  }
>  
>  /* CPU to runq_id macro */
> -static inline int c2r(const struct scheduler *ops, unsigned int cpu)
> +static inline int c2r(unsigned int cpu)
>  {
> -return csched2_priv(ops)->runq_map[(cpu)];
> +return per_cpu(runq_map, cpu);
>  }
>  
>  /* CPU to runqueue struct macro */
>  static inline struct csched2_runqueue_data *c2rqd(const struct scheduler 
> *ops,
>unsigned int cpu)
>  {
> -return _priv(ops)->rqd[c2r(ops, cpu)];
> +return _priv(ops)->rqd[c2r(cpu)];
>  }
>  
>  /*
> @@ -1082,7 +1089,7 @@ runq_insert(const struct scheduler *ops, struct 
> csched2_vcpu *svc)
>  ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
>  
>  ASSERT(!vcpu_on_runq(svc));
> -ASSERT(c2r(ops, cpu) == c2r(ops, svc->vcpu->processor));
> +ASSERT(c2r(cpu) == c2r(svc->vcpu->processor));
>  
>  ASSERT(>rqd->runq == runq);
>  ASSERT(!is_idle_vcpu(svc->vcpu));
> @@ -1733,7 +1740,7 @@ csched2_cpu_pick(const struct scheduler *ops, struct 
> vcpu *vc)
>  if ( min_rqi == -1 )
>  {
>  new_cpu = get_fallback_cpu(svc);
> -min_rqi = c2r(ops, new_cpu);
> +min_rqi = c2r(new_cpu);
>  min_avgload = prv->rqd[min_rqi].b_avgload;
>  goto out_up;
>  }
> @@ -2622,7 +2629,7 @@ csched2_schedule(
>  unsigned tasklet:8, idle:8, smt_idle:8, tickled:8;
>  } d;
>  d.cpu = cpu;
> -d.rq_id = c2r(ops, cpu);
> +d.rq_id = c2r(cpu);
>  d.tasklet = tasklet_work_scheduled;
>  d.idle = is_idle_vcpu(current);
>  d.smt_idle = cpumask_test_cpu(cpu, >smt_idle);
> @@ -2783,7 +2790,7 @@ dump_pcpu(const struct scheduler *ops, int cpu)
>  #define cpustr keyhandler_scratch
>  
>  cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_mask, 
> cpu));
> -printk("CPU[%02d] runq=%d, sibling=%s, ", cpu, c2r(ops, cpu), cpustr);
> +printk("CPU[%02d] runq=%d, sibling=%s, ", cpu, c2r(cpu), cpustr);
>  cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_mask, cpu));
>  printk("core=%s\n", cpustr);
>  
> @@ -2930,7 +2937,7 @@ init_pdata(struct csched2_private *prv, unsigned int 
> cpu)
>  }
>  
>  /* Set the runqueue map */
> -prv->runq_map[cpu] = rqi;
> +per_cpu(runq_map, cpu) = rqi;
>  
>  __cpumask_set_cpu(cpu, >idle);
>  __cpumask_set_cpu(cpu, >active);
> @@ -3034,7 +3041,7 @@ csched2_deinit_pdata(const struct scheduler *ops, void 
> *pcpu, int cpu)
>  ASSERT(!pcpu && cpumask_test_cpu(cpu, >initialized));
>  
>  /* Find the old runqueue and remove this cpu from it */
> -rqi = prv->runq_map[cpu];
> +rqi = per_cpu(runq_map, cpu);
>  
>  rqd = prv->rqd + rqi;
>  
> @@ -3055,6 +3062,8 @@ csched2_deinit_pdata(const struct scheduler *ops, void 
> *pcpu, int cpu)
>  else if ( rqd->pick_bias == cpu )
>  rqd->pick_bias = cpumask_first(>active);
>  
> +per_cpu(runq_map, cpu) = -1;
> +
>  spin_unlock(>lock);
>  
>  __cpumask_clear_cpu(cpu, >initialized);
> @@ -3121,10 +3130,8 @@ csched2_init(struct scheduler *ops)
>  return -ENOMEM;
>  }
>  for ( i = 0; i < nr_cpu_ids; i++ )
> -{
> -prv->runq_map[i] = -1;
>  prv->rqd[i].id = -1;
> -}
> +
>  /* initialize ratelimit */
>  prv->ratelimit_us = sched_ratelimit_us;
>  
> 


___

[Xen-devel] [PATCH 2/6] xen: credit2: make the cpu to runqueue map per-cpu

2017-06-23 Thread Dario Faggioli
Instead of keeping an NR_CPUS big array of int-s,
directly inside csched2_private, use a per-cpu
variable.

That's especially beneficial (in terms of saved
memory) when there are more instance of Credit2 (in
different cpupools), and also helps fitting
csched2_private itself into CPU caches.

Signed-off-by: Dario Faggioli 
---
Cc: George Dunlap 
Cc: Anshul Makkar 
---
 xen/common/sched_credit2.c |   33 -
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 10d9488..15862f2 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -383,7 +383,6 @@ struct csched2_private {
 
 struct list_head sdom; /* Used mostly for dump keyhandler. */
 
-int runq_map[NR_CPUS];
 cpumask_t active_queues; /* Queues which may have active cpus */
 struct csched2_runqueue_data *rqd;
 
@@ -393,6 +392,14 @@ struct csched2_private {
 };
 
 /*
+ * Physical CPU
+ *
+ * The only per-pCPU information we need to maintain is of which runqueue
+ * each CPU is part of.
+ */
+static DEFINE_PER_CPU(int, runq_map);
+
+/*
  * Virtual CPU
  */
 struct csched2_vcpu {
@@ -448,16 +455,16 @@ static inline struct csched2_dom *csched2_dom(const 
struct domain *d)
 }
 
 /* CPU to runq_id macro */
-static inline int c2r(const struct scheduler *ops, unsigned int cpu)
+static inline int c2r(unsigned int cpu)
 {
-return csched2_priv(ops)->runq_map[(cpu)];
+return per_cpu(runq_map, cpu);
 }
 
 /* CPU to runqueue struct macro */
 static inline struct csched2_runqueue_data *c2rqd(const struct scheduler *ops,
   unsigned int cpu)
 {
-return _priv(ops)->rqd[c2r(ops, cpu)];
+return _priv(ops)->rqd[c2r(cpu)];
 }
 
 /*
@@ -1082,7 +1089,7 @@ runq_insert(const struct scheduler *ops, struct 
csched2_vcpu *svc)
 ASSERT(spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock));
 
 ASSERT(!vcpu_on_runq(svc));
-ASSERT(c2r(ops, cpu) == c2r(ops, svc->vcpu->processor));
+ASSERT(c2r(cpu) == c2r(svc->vcpu->processor));
 
 ASSERT(>rqd->runq == runq);
 ASSERT(!is_idle_vcpu(svc->vcpu));
@@ -1733,7 +1740,7 @@ csched2_cpu_pick(const struct scheduler *ops, struct vcpu 
*vc)
 if ( min_rqi == -1 )
 {
 new_cpu = get_fallback_cpu(svc);
-min_rqi = c2r(ops, new_cpu);
+min_rqi = c2r(new_cpu);
 min_avgload = prv->rqd[min_rqi].b_avgload;
 goto out_up;
 }
@@ -2622,7 +2629,7 @@ csched2_schedule(
 unsigned tasklet:8, idle:8, smt_idle:8, tickled:8;
 } d;
 d.cpu = cpu;
-d.rq_id = c2r(ops, cpu);
+d.rq_id = c2r(cpu);
 d.tasklet = tasklet_work_scheduled;
 d.idle = is_idle_vcpu(current);
 d.smt_idle = cpumask_test_cpu(cpu, >smt_idle);
@@ -2783,7 +2790,7 @@ dump_pcpu(const struct scheduler *ops, int cpu)
 #define cpustr keyhandler_scratch
 
 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_mask, cpu));
-printk("CPU[%02d] runq=%d, sibling=%s, ", cpu, c2r(ops, cpu), cpustr);
+printk("CPU[%02d] runq=%d, sibling=%s, ", cpu, c2r(cpu), cpustr);
 cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_core_mask, cpu));
 printk("core=%s\n", cpustr);
 
@@ -2930,7 +2937,7 @@ init_pdata(struct csched2_private *prv, unsigned int cpu)
 }
 
 /* Set the runqueue map */
-prv->runq_map[cpu] = rqi;
+per_cpu(runq_map, cpu) = rqi;
 
 __cpumask_set_cpu(cpu, >idle);
 __cpumask_set_cpu(cpu, >active);
@@ -3034,7 +3041,7 @@ csched2_deinit_pdata(const struct scheduler *ops, void 
*pcpu, int cpu)
 ASSERT(!pcpu && cpumask_test_cpu(cpu, >initialized));
 
 /* Find the old runqueue and remove this cpu from it */
-rqi = prv->runq_map[cpu];
+rqi = per_cpu(runq_map, cpu);
 
 rqd = prv->rqd + rqi;
 
@@ -3055,6 +3062,8 @@ csched2_deinit_pdata(const struct scheduler *ops, void 
*pcpu, int cpu)
 else if ( rqd->pick_bias == cpu )
 rqd->pick_bias = cpumask_first(>active);
 
+per_cpu(runq_map, cpu) = -1;
+
 spin_unlock(>lock);
 
 __cpumask_clear_cpu(cpu, >initialized);
@@ -3121,10 +3130,8 @@ csched2_init(struct scheduler *ops)
 return -ENOMEM;
 }
 for ( i = 0; i < nr_cpu_ids; i++ )
-{
-prv->runq_map[i] = -1;
 prv->rqd[i].id = -1;
-}
+
 /* initialize ratelimit */
 prv->ratelimit_us = sched_ratelimit_us;
 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel