Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-09-01 Thread Juri Lelli
Hi,

On 30/08/15 12:25, Wanpeng Li wrote:
> On 8/10/15 10:10 PM, Juri Lelli wrote:
>> On 06/08/15 09:39, Wanpeng Li wrote:
>>> Hi Juri,
>>>
>> Hi,
>>
>>> 2015-05-06 16:14 GMT+08:00 Juri Lelli >> >:
>>>
>>>  Hi Wanpeng,
>>>
>>>  I finally got to review this, sorry about the huge delay.
>>>
>>>  On 07/04/2015 04:36, Wanpeng Li wrote:
>>>  > The total used dl bandwidth of each root domain will be reset to 0 
>>> after
>>>  > cpu hotplug when rebuild sched domains, since the call path is:
>>>  >
>>>  > _cpu_down
>>>  >   cpuset_cpu_inactive()
>>>  > cpuset_update_active_cpus()
>>>  >   partition_sched_domains()
>>>  > build_sched_domains()
>>>  >   init_rootdomain()
>>>  > init_dl_bw()
>>>  >
>>>  > The bandwidth which dl task occupy will be released when dl task 
>>> dead,
>>>  > it will be minus from total used dl bandwidth of its root domain,
>>>  > however, bandwidth overflow occurs since total used dl bandwidth is 
>>> 0.
>>>  >
>>>
>>>  Right, that's a bug.
>>>
>>>  > This patch fix it by attaching the bandwidth which dl task occupy to
>>>  > the new root domain when the task is migrating since cpu hotplug, and
>>>  > attach all the used dl bandwidth of dl tasks to the new root domain
>>>  > when sched domains are rebuild.
>>>  >
>>>
>>>  But, I think this fix has still a couple of problems:
>>>
>>>   - what happens if a DL task is simply sleeping when domains are
>>> reconfigured?
>>>
>>>   - def_root_domain has now multiple accounting problems, as you do
>>> this thing even when a cpu is moved there in the cpuoff path
>>>
>>>  Also, runqueue (and throttling) information are dynamic, while we
>>>  are trying to fix a static problem. It's probably not a good idea
>>>  mixing them.
>>>
>>>  I'm not sure how (I need more time to think it through), but can
>>>  we maybe fix this using cpuset information?
>>>
>>>
>>> Any ideas?
>>>
>> Yes, actually. I might have a different fix, but I'd like to play with
>> it a bit more as it is a bit too intrusive. Let me see if I can come
>> up with something that I can share.
> 
> Ping Peter, Juri, any detail ideas to help me post another version of my 
> patch? ;-)
> 

Let me see if I'm able to post my version of the fix before
end of this week ;).

Thanks!

- Juri

> Regards,
> Wanpeng Li
> 
>>
>> Thanks,
>>
>> - Juri
>>
>>> Regards,
>>> Wanpeng Li
>>>   
>>>
>>>
>>>  Thanks,
>>>
>>>  - Juri
>>>
>>>  > Signed-off-by: Wanpeng Li >>  >
>>>  > ---
>>>  >  kernel/sched/core.c |  1 +
>>>  >  kernel/sched/deadline.c | 25 +
>>>  >  kernel/sched/sched.h|  1 +
>>>  >  3 files changed, 27 insertions(+)
>>>  >
>>>  > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>>>  > index 28b0d75..c940999 100644
>>>  > --- a/kernel/sched/core.c
>>>  > +++ b/kernel/sched/core.c
>>>  > @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
>>>  struct root_domain *rd)
>>>  >   rq->rd = rd;
>>>  >
>>>  >   cpumask_set_cpu(rq->cpu, rd->span);
>>>  > + attach_dl_bw(rq);
>>>  >   if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
>>>  >   set_rq_online(rq);
>>>  >
>>>  > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>>>  > index 5e95145..62680d7 100644
>>>  > --- a/kernel/sched/deadline.c
>>>  > +++ b/kernel/sched/deadline.c
>>>  > @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
>>>  rq *rq, struct task_struct *p)
>>>  >  {
>>>  >   struct rq *later_rq = NULL;
>>>  >   bool fallback = false;
>>>  > + struct dl_bw *dl_b;
>>>  >
>>>  >   later_rq = find_lock_later_rq(p, rq);
>>>  >
>>>  > @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
>>>  rq *rq, struct task_struct *p)
>>>  >   set_task_cpu(p, later_rq->cpu);
>>>  >   activate_task(later_rq, p, ENQUEUE_REPLENISH);
>>>  >
>>>  > + dl_b = dl_bw_of(later_rq->cpu);
>>>  > + raw_spin_lock(_b->lock);
>>>  > + __dl_add(dl_b, p->dl.dl_bw);
>>>  > + raw_spin_unlock(_b->lock);
>>>  > +
>>>  >   if (!fallback)
>>>  >   resched_curr(later_rq);
>>>  >
>>>  > @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
>>>  struct task_struct *p,
>>>  >   switched_to_dl(rq, p);
>>>  >  }
>>>  >
>>>  > +void attach_dl_bw(struct rq *rq)
>>>  > +{
>>>  > + struct rb_node *next_node = rq->dl.rb_leftmost;
>>>  > + struct sched_dl_entity *dl_se;
>>>  > + struct dl_bw *dl_b;
>>>  > +
>>>  > + dl_b = dl_bw_of(rq->cpu);
>>>  > + raw_spin_lock(_b->lock);
>>>  > 

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-09-01 Thread Juri Lelli
Hi,

On 30/08/15 12:25, Wanpeng Li wrote:
> On 8/10/15 10:10 PM, Juri Lelli wrote:
>> On 06/08/15 09:39, Wanpeng Li wrote:
>>> Hi Juri,
>>>
>> Hi,
>>
>>> 2015-05-06 16:14 GMT+08:00 Juri Lelli >> >:
>>>
>>>  Hi Wanpeng,
>>>
>>>  I finally got to review this, sorry about the huge delay.
>>>
>>>  On 07/04/2015 04:36, Wanpeng Li wrote:
>>>  > The total used dl bandwidth of each root domain will be reset to 0 
>>> after
>>>  > cpu hotplug when rebuild sched domains, since the call path is:
>>>  >
>>>  > _cpu_down
>>>  >   cpuset_cpu_inactive()
>>>  > cpuset_update_active_cpus()
>>>  >   partition_sched_domains()
>>>  > build_sched_domains()
>>>  >   init_rootdomain()
>>>  > init_dl_bw()
>>>  >
>>>  > The bandwidth which dl task occupy will be released when dl task 
>>> dead,
>>>  > it will be minus from total used dl bandwidth of its root domain,
>>>  > however, bandwidth overflow occurs since total used dl bandwidth is 
>>> 0.
>>>  >
>>>
>>>  Right, that's a bug.
>>>
>>>  > This patch fix it by attaching the bandwidth which dl task occupy to
>>>  > the new root domain when the task is migrating since cpu hotplug, and
>>>  > attach all the used dl bandwidth of dl tasks to the new root domain
>>>  > when sched domains are rebuild.
>>>  >
>>>
>>>  But, I think this fix has still a couple of problems:
>>>
>>>   - what happens if a DL task is simply sleeping when domains are
>>> reconfigured?
>>>
>>>   - def_root_domain has now multiple accounting problems, as you do
>>> this thing even when a cpu is moved there in the cpuoff path
>>>
>>>  Also, runqueue (and throttling) information are dynamic, while we
>>>  are trying to fix a static problem. It's probably not a good idea
>>>  mixing them.
>>>
>>>  I'm not sure how (I need more time to think it through), but can
>>>  we maybe fix this using cpuset information?
>>>
>>>
>>> Any ideas?
>>>
>> Yes, actually. I might have a different fix, but I'd like to play with
>> it a bit more as it is a bit too intrusive. Let me see if I can come
>> up with something that I can share.
> 
> Ping Peter, Juri, any detail ideas to help me post another version of my 
> patch? ;-)
> 

Let me see if I'm able to post my version of the fix before
end of this week ;).

Thanks!

- Juri

> Regards,
> Wanpeng Li
> 
>>
>> Thanks,
>>
>> - Juri
>>
>>> Regards,
>>> Wanpeng Li
>>>   
>>>
>>>
>>>  Thanks,
>>>
>>>  - Juri
>>>
>>>  > Signed-off-by: Wanpeng Li >>  >
>>>  > ---
>>>  >  kernel/sched/core.c |  1 +
>>>  >  kernel/sched/deadline.c | 25 +
>>>  >  kernel/sched/sched.h|  1 +
>>>  >  3 files changed, 27 insertions(+)
>>>  >
>>>  > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>>>  > index 28b0d75..c940999 100644
>>>  > --- a/kernel/sched/core.c
>>>  > +++ b/kernel/sched/core.c
>>>  > @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
>>>  struct root_domain *rd)
>>>  >   rq->rd = rd;
>>>  >
>>>  >   cpumask_set_cpu(rq->cpu, rd->span);
>>>  > + attach_dl_bw(rq);
>>>  >   if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
>>>  >   set_rq_online(rq);
>>>  >
>>>  > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>>>  > index 5e95145..62680d7 100644
>>>  > --- a/kernel/sched/deadline.c
>>>  > +++ b/kernel/sched/deadline.c
>>>  > @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
>>>  rq *rq, struct task_struct *p)
>>>  >  {
>>>  >   struct rq *later_rq = NULL;
>>>  >   bool fallback = false;
>>>  > + struct dl_bw *dl_b;
>>>  >
>>>  >   later_rq = find_lock_later_rq(p, rq);
>>>  >
>>>  > @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
>>>  rq *rq, struct task_struct *p)
>>>  >   set_task_cpu(p, later_rq->cpu);
>>>  >   activate_task(later_rq, p, ENQUEUE_REPLENISH);
>>>  >
>>>  > + dl_b = dl_bw_of(later_rq->cpu);
>>>  > + raw_spin_lock(_b->lock);
>>>  > + __dl_add(dl_b, p->dl.dl_bw);
>>>  > + raw_spin_unlock(_b->lock);
>>>  > +
>>>  >   if (!fallback)
>>>  >   resched_curr(later_rq);
>>>  >
>>>  > @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
>>>  struct task_struct *p,
>>>  >   switched_to_dl(rq, p);
>>>  >  }
>>>  >
>>>  > +void attach_dl_bw(struct rq *rq)
>>>  > +{
>>>  > + struct rb_node *next_node = rq->dl.rb_leftmost;
>>>  > + struct sched_dl_entity *dl_se;
>>>  > + struct dl_bw *dl_b;
>>>  > +
>>>  > + dl_b = dl_bw_of(rq->cpu);
>>>  > +  

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-08-30 Thread Wanpeng Li

On 8/10/15 10:10 PM, Juri Lelli wrote:

On 06/08/15 09:39, Wanpeng Li wrote:

Hi Juri,


Hi,


2015-05-06 16:14 GMT+08:00 Juri Lelli mailto:juri.le...@arm.com>>:

 Hi Wanpeng,

 I finally got to review this, sorry about the huge delay.

 On 07/04/2015 04:36, Wanpeng Li wrote:
 > The total used dl bandwidth of each root domain will be reset to 0 after
 > cpu hotplug when rebuild sched domains, since the call path is:
 >
 > _cpu_down
 >   cpuset_cpu_inactive()
 > cpuset_update_active_cpus()
 >   partition_sched_domains()
 > build_sched_domains()
 >   init_rootdomain()
 > init_dl_bw()
 >
 > The bandwidth which dl task occupy will be released when dl task dead,
 > it will be minus from total used dl bandwidth of its root domain,
 > however, bandwidth overflow occurs since total used dl bandwidth is 0.
 >

 Right, that's a bug.

 > This patch fix it by attaching the bandwidth which dl task occupy to
 > the new root domain when the task is migrating since cpu hotplug, and
 > attach all the used dl bandwidth of dl tasks to the new root domain
 > when sched domains are rebuild.
 >

 But, I think this fix has still a couple of problems:

  - what happens if a DL task is simply sleeping when domains are
reconfigured?

  - def_root_domain has now multiple accounting problems, as you do
this thing even when a cpu is moved there in the cpuoff path

 Also, runqueue (and throttling) information are dynamic, while we
 are trying to fix a static problem. It's probably not a good idea
 mixing them.

 I'm not sure how (I need more time to think it through), but can
 we maybe fix this using cpuset information?


Any ideas?


Yes, actually. I might have a different fix, but I'd like to play with
it a bit more as it is a bit too intrusive. Let me see if I can come
up with something that I can share.


Ping Peter, Juri, any detail ideas to help me post another version of my 
patch? ;-)


Regards,
Wanpeng Li



Thanks,

- Juri


Regards,
Wanpeng Li
  



 Thanks,

 - Juri

 > Signed-off-by: Wanpeng Li mailto:wanpeng...@linux.intel.com>>
 > ---
 >  kernel/sched/core.c |  1 +
 >  kernel/sched/deadline.c | 25 +
 >  kernel/sched/sched.h|  1 +
 >  3 files changed, 27 insertions(+)
 >
 > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 > index 28b0d75..c940999 100644
 > --- a/kernel/sched/core.c
 > +++ b/kernel/sched/core.c
 > @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
 struct root_domain *rd)
 >   rq->rd = rd;
 >
 >   cpumask_set_cpu(rq->cpu, rd->span);
 > + attach_dl_bw(rq);
 >   if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
 >   set_rq_online(rq);
 >
 > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
 > index 5e95145..62680d7 100644
 > --- a/kernel/sched/deadline.c
 > +++ b/kernel/sched/deadline.c
 > @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
 >  {
 >   struct rq *later_rq = NULL;
 >   bool fallback = false;
 > + struct dl_bw *dl_b;
 >
 >   later_rq = find_lock_later_rq(p, rq);
 >
 > @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
 >   set_task_cpu(p, later_rq->cpu);
 >   activate_task(later_rq, p, ENQUEUE_REPLENISH);
 >
 > + dl_b = dl_bw_of(later_rq->cpu);
 > + raw_spin_lock(_b->lock);
 > + __dl_add(dl_b, p->dl.dl_bw);
 > + raw_spin_unlock(_b->lock);
 > +
 >   if (!fallback)
 >   resched_curr(later_rq);
 >
 > @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
 struct task_struct *p,
 >   switched_to_dl(rq, p);
 >  }
 >
 > +void attach_dl_bw(struct rq *rq)
 > +{
 > + struct rb_node *next_node = rq->dl.rb_leftmost;
 > + struct sched_dl_entity *dl_se;
 > + struct dl_bw *dl_b;
 > +
 > + dl_b = dl_bw_of(rq->cpu);
 > + raw_spin_lock(_b->lock);
 > +next_node:
 > + if (next_node) {
 > + dl_se = rb_entry(next_node, struct sched_dl_entity,
 rb_node);
 > + __dl_add(dl_b, dl_se->dl_bw);
 > + next_node = rb_next(next_node);
 > +
 > + goto next_node;
 > + }
 > + raw_spin_unlock(_b->lock);
 > +}
 > +
 >  const struct sched_class dl_sched_class = {
 >   .next   = _sched_class,
 >   .enqueue_task   = enqueue_task_dl,
 > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
 > index e0e1299..a7b1a59 100644
 > --- a/kernel/sched/sched.h
 > +++ 

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-08-30 Thread Wanpeng Li

On 8/10/15 10:10 PM, Juri Lelli wrote:

On 06/08/15 09:39, Wanpeng Li wrote:

Hi Juri,


Hi,


2015-05-06 16:14 GMT+08:00 Juri Lelli juri.le...@arm.com
mailto:juri.le...@arm.com:

 Hi Wanpeng,

 I finally got to review this, sorry about the huge delay.

 On 07/04/2015 04:36, Wanpeng Li wrote:
  The total used dl bandwidth of each root domain will be reset to 0 after
  cpu hotplug when rebuild sched domains, since the call path is:
 
  _cpu_down
cpuset_cpu_inactive()
  cpuset_update_active_cpus()
partition_sched_domains()
  build_sched_domains()
init_rootdomain()
  init_dl_bw()
 
  The bandwidth which dl task occupy will be released when dl task dead,
  it will be minus from total used dl bandwidth of its root domain,
  however, bandwidth overflow occurs since total used dl bandwidth is 0.
 

 Right, that's a bug.

  This patch fix it by attaching the bandwidth which dl task occupy to
  the new root domain when the task is migrating since cpu hotplug, and
  attach all the used dl bandwidth of dl tasks to the new root domain
  when sched domains are rebuild.
 

 But, I think this fix has still a couple of problems:

  - what happens if a DL task is simply sleeping when domains are
reconfigured?

  - def_root_domain has now multiple accounting problems, as you do
this thing even when a cpu is moved there in the cpuoff path

 Also, runqueue (and throttling) information are dynamic, while we
 are trying to fix a static problem. It's probably not a good idea
 mixing them.

 I'm not sure how (I need more time to think it through), but can
 we maybe fix this using cpuset information?


Any ideas?


Yes, actually. I might have a different fix, but I'd like to play with
it a bit more as it is a bit too intrusive. Let me see if I can come
up with something that I can share.


Ping Peter, Juri, any detail ideas to help me post another version of my 
patch? ;-)


Regards,
Wanpeng Li



Thanks,

- Juri


Regards,
Wanpeng Li
  



 Thanks,

 - Juri

  Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 mailto:wanpeng...@linux.intel.com
  ---
   kernel/sched/core.c |  1 +
   kernel/sched/deadline.c | 25 +
   kernel/sched/sched.h|  1 +
   3 files changed, 27 insertions(+)
 
  diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  index 28b0d75..c940999 100644
  --- a/kernel/sched/core.c
  +++ b/kernel/sched/core.c
  @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
 struct root_domain *rd)
rq-rd = rd;
 
cpumask_set_cpu(rq-cpu, rd-span);
  + attach_dl_bw(rq);
if (cpumask_test_cpu(rq-cpu, cpu_active_mask))
set_rq_online(rq);
 
  diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
  index 5e95145..62680d7 100644
  --- a/kernel/sched/deadline.c
  +++ b/kernel/sched/deadline.c
  @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
   {
struct rq *later_rq = NULL;
bool fallback = false;
  + struct dl_bw *dl_b;
 
later_rq = find_lock_later_rq(p, rq);
 
  @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
set_task_cpu(p, later_rq-cpu);
activate_task(later_rq, p, ENQUEUE_REPLENISH);
 
  + dl_b = dl_bw_of(later_rq-cpu);
  + raw_spin_lock(dl_b-lock);
  + __dl_add(dl_b, p-dl.dl_bw);
  + raw_spin_unlock(dl_b-lock);
  +
if (!fallback)
resched_curr(later_rq);
 
  @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
 struct task_struct *p,
switched_to_dl(rq, p);
   }
 
  +void attach_dl_bw(struct rq *rq)
  +{
  + struct rb_node *next_node = rq-dl.rb_leftmost;
  + struct sched_dl_entity *dl_se;
  + struct dl_bw *dl_b;
  +
  + dl_b = dl_bw_of(rq-cpu);
  + raw_spin_lock(dl_b-lock);
  +next_node:
  + if (next_node) {
  + dl_se = rb_entry(next_node, struct sched_dl_entity,
 rb_node);
  + __dl_add(dl_b, dl_se-dl_bw);
  + next_node = rb_next(next_node);
  +
  + goto next_node;
  + }
  + raw_spin_unlock(dl_b-lock);
  +}
  +
   const struct sched_class dl_sched_class = {
.next   = rt_sched_class,
.enqueue_task   = enqueue_task_dl,
  diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  index e0e1299..a7b1a59 100644
  --- a/kernel/sched/sched.h
  +++ b/kernel/sched/sched.h
  @@ -1676,6 +1676,7 @@ extern void 

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-08-10 Thread Wanpeng Li



On 8/10/15 10:10 PM, Juri Lelli wrote:

On 06/08/15 09:39, Wanpeng Li wrote:

Hi Juri,


Hi,


2015-05-06 16:14 GMT+08:00 Juri Lelli mailto:juri.le...@arm.com>>:

 Hi Wanpeng,

 I finally got to review this, sorry about the huge delay.

 On 07/04/2015 04:36, Wanpeng Li wrote:
 > The total used dl bandwidth of each root domain will be reset to 0 after
 > cpu hotplug when rebuild sched domains, since the call path is:
 >
 > _cpu_down
 >   cpuset_cpu_inactive()
 > cpuset_update_active_cpus()
 >   partition_sched_domains()
 > build_sched_domains()
 >   init_rootdomain()
 > init_dl_bw()
 >
 > The bandwidth which dl task occupy will be released when dl task dead,
 > it will be minus from total used dl bandwidth of its root domain,
 > however, bandwidth overflow occurs since total used dl bandwidth is 0.
 >

 Right, that's a bug.

 > This patch fix it by attaching the bandwidth which dl task occupy to
 > the new root domain when the task is migrating since cpu hotplug, and
 > attach all the used dl bandwidth of dl tasks to the new root domain
 > when sched domains are rebuild.
 >

 But, I think this fix has still a couple of problems:

  - what happens if a DL task is simply sleeping when domains are
reconfigured?

  - def_root_domain has now multiple accounting problems, as you do
this thing even when a cpu is moved there in the cpuoff path

 Also, runqueue (and throttling) information are dynamic, while we
 are trying to fix a static problem. It's probably not a good idea
 mixing them.

 I'm not sure how (I need more time to think it through), but can
 we maybe fix this using cpuset information?


Any ideas?


Yes, actually. I might have a different fix, but I'd like to play with
it a bit more as it is a bit too intrusive. Let me see if I can come
up with something that I can share.


Cool, looking forward for your patches. :)

Regards,
Wanpeng Li



Thanks,

- Juri


Regards,
Wanpeng Li
  



 Thanks,

 - Juri

 > Signed-off-by: Wanpeng Li mailto:wanpeng...@linux.intel.com>>
 > ---
 >  kernel/sched/core.c |  1 +
 >  kernel/sched/deadline.c | 25 +
 >  kernel/sched/sched.h|  1 +
 >  3 files changed, 27 insertions(+)
 >
 > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 > index 28b0d75..c940999 100644
 > --- a/kernel/sched/core.c
 > +++ b/kernel/sched/core.c
 > @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
 struct root_domain *rd)
 >   rq->rd = rd;
 >
 >   cpumask_set_cpu(rq->cpu, rd->span);
 > + attach_dl_bw(rq);
 >   if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
 >   set_rq_online(rq);
 >
 > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
 > index 5e95145..62680d7 100644
 > --- a/kernel/sched/deadline.c
 > +++ b/kernel/sched/deadline.c
 > @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
 >  {
 >   struct rq *later_rq = NULL;
 >   bool fallback = false;
 > + struct dl_bw *dl_b;
 >
 >   later_rq = find_lock_later_rq(p, rq);
 >
 > @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
 >   set_task_cpu(p, later_rq->cpu);
 >   activate_task(later_rq, p, ENQUEUE_REPLENISH);
 >
 > + dl_b = dl_bw_of(later_rq->cpu);
 > + raw_spin_lock(_b->lock);
 > + __dl_add(dl_b, p->dl.dl_bw);
 > + raw_spin_unlock(_b->lock);
 > +
 >   if (!fallback)
 >   resched_curr(later_rq);
 >
 > @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
 struct task_struct *p,
 >   switched_to_dl(rq, p);
 >  }
 >
 > +void attach_dl_bw(struct rq *rq)
 > +{
 > + struct rb_node *next_node = rq->dl.rb_leftmost;
 > + struct sched_dl_entity *dl_se;
 > + struct dl_bw *dl_b;
 > +
 > + dl_b = dl_bw_of(rq->cpu);
 > + raw_spin_lock(_b->lock);
 > +next_node:
 > + if (next_node) {
 > + dl_se = rb_entry(next_node, struct sched_dl_entity,
 rb_node);
 > + __dl_add(dl_b, dl_se->dl_bw);
 > + next_node = rb_next(next_node);
 > +
 > + goto next_node;
 > + }
 > + raw_spin_unlock(_b->lock);
 > +}
 > +
 >  const struct sched_class dl_sched_class = {
 >   .next   = _sched_class,
 >   .enqueue_task   = enqueue_task_dl,
 > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
 > index e0e1299..a7b1a59 100644
 > --- a/kernel/sched/sched.h
 > +++ b/kernel/sched/sched.h
 > @@ -1676,6 +1676,7 @@ 

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-08-10 Thread Juri Lelli
On 06/08/15 09:39, Wanpeng Li wrote:
> Hi Juri,
>

Hi,

> 2015-05-06 16:14 GMT+08:00 Juri Lelli  >:
> 
> Hi Wanpeng,
> 
> I finally got to review this, sorry about the huge delay.
> 
> On 07/04/2015 04:36, Wanpeng Li wrote:
> > The total used dl bandwidth of each root domain will be reset to 0 after
> > cpu hotplug when rebuild sched domains, since the call path is:
> >
> > _cpu_down
> >   cpuset_cpu_inactive()
> > cpuset_update_active_cpus()
> >   partition_sched_domains()
> > build_sched_domains()
> >   init_rootdomain()
> > init_dl_bw()
> >
> > The bandwidth which dl task occupy will be released when dl task dead,
> > it will be minus from total used dl bandwidth of its root domain,
> > however, bandwidth overflow occurs since total used dl bandwidth is 0.
> >
> 
> Right, that's a bug.
> 
> > This patch fix it by attaching the bandwidth which dl task occupy to
> > the new root domain when the task is migrating since cpu hotplug, and
> > attach all the used dl bandwidth of dl tasks to the new root domain
> > when sched domains are rebuild.
> >
> 
> But, I think this fix has still a couple of problems:
> 
>  - what happens if a DL task is simply sleeping when domains are
>reconfigured?
> 
>  - def_root_domain has now multiple accounting problems, as you do
>this thing even when a cpu is moved there in the cpuoff path
> 
> Also, runqueue (and throttling) information are dynamic, while we
> are trying to fix a static problem. It's probably not a good idea
> mixing them.
> 
> I'm not sure how (I need more time to think it through), but can
> we maybe fix this using cpuset information?
> 
> 
> Any ideas?
> 

Yes, actually. I might have a different fix, but I'd like to play with
it a bit more as it is a bit too intrusive. Let me see if I can come
up with something that I can share.

Thanks,

- Juri

> Regards,
> Wanpeng Li
>  
> 
> 
> Thanks,
> 
> - Juri
> 
> > Signed-off-by: Wanpeng Li  >
> > ---
> >  kernel/sched/core.c |  1 +
> >  kernel/sched/deadline.c | 25 +
> >  kernel/sched/sched.h|  1 +
> >  3 files changed, 27 insertions(+)
> >
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 28b0d75..c940999 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
> struct root_domain *rd)
> >   rq->rd = rd;
> >
> >   cpumask_set_cpu(rq->cpu, rd->span);
> > + attach_dl_bw(rq);
> >   if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
> >   set_rq_online(rq);
> >
> > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> > index 5e95145..62680d7 100644
> > --- a/kernel/sched/deadline.c
> > +++ b/kernel/sched/deadline.c
> > @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
> rq *rq, struct task_struct *p)
> >  {
> >   struct rq *later_rq = NULL;
> >   bool fallback = false;
> > + struct dl_bw *dl_b;
> >
> >   later_rq = find_lock_later_rq(p, rq);
> >
> > @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
> rq *rq, struct task_struct *p)
> >   set_task_cpu(p, later_rq->cpu);
> >   activate_task(later_rq, p, ENQUEUE_REPLENISH);
> >
> > + dl_b = dl_bw_of(later_rq->cpu);
> > + raw_spin_lock(_b->lock);
> > + __dl_add(dl_b, p->dl.dl_bw);
> > + raw_spin_unlock(_b->lock);
> > +
> >   if (!fallback)
> >   resched_curr(later_rq);
> >
> > @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
> struct task_struct *p,
> >   switched_to_dl(rq, p);
> >  }
> >
> > +void attach_dl_bw(struct rq *rq)
> > +{
> > + struct rb_node *next_node = rq->dl.rb_leftmost;
> > + struct sched_dl_entity *dl_se;
> > + struct dl_bw *dl_b;
> > +
> > + dl_b = dl_bw_of(rq->cpu);
> > + raw_spin_lock(_b->lock);
> > +next_node:
> > + if (next_node) {
> > + dl_se = rb_entry(next_node, struct sched_dl_entity,
> rb_node);
> > + __dl_add(dl_b, dl_se->dl_bw);
> > + next_node = rb_next(next_node);
> > +
> > + goto next_node;
> > + }
> > + raw_spin_unlock(_b->lock);
> > +}
> > +
> >  const struct sched_class dl_sched_class = {
> >   .next   = _sched_class,
> >   .enqueue_task   = enqueue_task_dl,
> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> > index e0e1299..a7b1a59 100644
> > --- a/kernel/sched/sched.h
> > +++ 

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-08-10 Thread Wanpeng Li



On 8/10/15 10:10 PM, Juri Lelli wrote:

On 06/08/15 09:39, Wanpeng Li wrote:

Hi Juri,


Hi,


2015-05-06 16:14 GMT+08:00 Juri Lelli juri.le...@arm.com
mailto:juri.le...@arm.com:

 Hi Wanpeng,

 I finally got to review this, sorry about the huge delay.

 On 07/04/2015 04:36, Wanpeng Li wrote:
  The total used dl bandwidth of each root domain will be reset to 0 after
  cpu hotplug when rebuild sched domains, since the call path is:
 
  _cpu_down
cpuset_cpu_inactive()
  cpuset_update_active_cpus()
partition_sched_domains()
  build_sched_domains()
init_rootdomain()
  init_dl_bw()
 
  The bandwidth which dl task occupy will be released when dl task dead,
  it will be minus from total used dl bandwidth of its root domain,
  however, bandwidth overflow occurs since total used dl bandwidth is 0.
 

 Right, that's a bug.

  This patch fix it by attaching the bandwidth which dl task occupy to
  the new root domain when the task is migrating since cpu hotplug, and
  attach all the used dl bandwidth of dl tasks to the new root domain
  when sched domains are rebuild.
 

 But, I think this fix has still a couple of problems:

  - what happens if a DL task is simply sleeping when domains are
reconfigured?

  - def_root_domain has now multiple accounting problems, as you do
this thing even when a cpu is moved there in the cpuoff path

 Also, runqueue (and throttling) information are dynamic, while we
 are trying to fix a static problem. It's probably not a good idea
 mixing them.

 I'm not sure how (I need more time to think it through), but can
 we maybe fix this using cpuset information?


Any ideas?


Yes, actually. I might have a different fix, but I'd like to play with
it a bit more as it is a bit too intrusive. Let me see if I can come
up with something that I can share.


Cool, looking forward for your patches. :)

Regards,
Wanpeng Li



Thanks,

- Juri


Regards,
Wanpeng Li
  



 Thanks,

 - Juri

  Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 mailto:wanpeng...@linux.intel.com
  ---
   kernel/sched/core.c |  1 +
   kernel/sched/deadline.c | 25 +
   kernel/sched/sched.h|  1 +
   3 files changed, 27 insertions(+)
 
  diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  index 28b0d75..c940999 100644
  --- a/kernel/sched/core.c
  +++ b/kernel/sched/core.c
  @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
 struct root_domain *rd)
rq-rd = rd;
 
cpumask_set_cpu(rq-cpu, rd-span);
  + attach_dl_bw(rq);
if (cpumask_test_cpu(rq-cpu, cpu_active_mask))
set_rq_online(rq);
 
  diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
  index 5e95145..62680d7 100644
  --- a/kernel/sched/deadline.c
  +++ b/kernel/sched/deadline.c
  @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
   {
struct rq *later_rq = NULL;
bool fallback = false;
  + struct dl_bw *dl_b;
 
later_rq = find_lock_later_rq(p, rq);
 
  @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
set_task_cpu(p, later_rq-cpu);
activate_task(later_rq, p, ENQUEUE_REPLENISH);
 
  + dl_b = dl_bw_of(later_rq-cpu);
  + raw_spin_lock(dl_b-lock);
  + __dl_add(dl_b, p-dl.dl_bw);
  + raw_spin_unlock(dl_b-lock);
  +
if (!fallback)
resched_curr(later_rq);
 
  @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
 struct task_struct *p,
switched_to_dl(rq, p);
   }
 
  +void attach_dl_bw(struct rq *rq)
  +{
  + struct rb_node *next_node = rq-dl.rb_leftmost;
  + struct sched_dl_entity *dl_se;
  + struct dl_bw *dl_b;
  +
  + dl_b = dl_bw_of(rq-cpu);
  + raw_spin_lock(dl_b-lock);
  +next_node:
  + if (next_node) {
  + dl_se = rb_entry(next_node, struct sched_dl_entity,
 rb_node);
  + __dl_add(dl_b, dl_se-dl_bw);
  + next_node = rb_next(next_node);
  +
  + goto next_node;
  + }
  + raw_spin_unlock(dl_b-lock);
  +}
  +
   const struct sched_class dl_sched_class = {
.next   = rt_sched_class,
.enqueue_task   = enqueue_task_dl,
  diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  index e0e1299..a7b1a59 100644
  --- a/kernel/sched/sched.h
  +++ b/kernel/sched/sched.h
  @@ -1676,6 +1676,7 @@ extern void init_dl_rq(struct dl_rq *dl_rq);
 
 

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-08-10 Thread Juri Lelli
On 06/08/15 09:39, Wanpeng Li wrote:
 Hi Juri,


Hi,

 2015-05-06 16:14 GMT+08:00 Juri Lelli juri.le...@arm.com
 mailto:juri.le...@arm.com:
 
 Hi Wanpeng,
 
 I finally got to review this, sorry about the huge delay.
 
 On 07/04/2015 04:36, Wanpeng Li wrote:
  The total used dl bandwidth of each root domain will be reset to 0 after
  cpu hotplug when rebuild sched domains, since the call path is:
 
  _cpu_down
cpuset_cpu_inactive()
  cpuset_update_active_cpus()
partition_sched_domains()
  build_sched_domains()
init_rootdomain()
  init_dl_bw()
 
  The bandwidth which dl task occupy will be released when dl task dead,
  it will be minus from total used dl bandwidth of its root domain,
  however, bandwidth overflow occurs since total used dl bandwidth is 0.
 
 
 Right, that's a bug.
 
  This patch fix it by attaching the bandwidth which dl task occupy to
  the new root domain when the task is migrating since cpu hotplug, and
  attach all the used dl bandwidth of dl tasks to the new root domain
  when sched domains are rebuild.
 
 
 But, I think this fix has still a couple of problems:
 
  - what happens if a DL task is simply sleeping when domains are
reconfigured?
 
  - def_root_domain has now multiple accounting problems, as you do
this thing even when a cpu is moved there in the cpuoff path
 
 Also, runqueue (and throttling) information are dynamic, while we
 are trying to fix a static problem. It's probably not a good idea
 mixing them.
 
 I'm not sure how (I need more time to think it through), but can
 we maybe fix this using cpuset information?
 
 
 Any ideas?
 

Yes, actually. I might have a different fix, but I'd like to play with
it a bit more as it is a bit too intrusive. Let me see if I can come
up with something that I can share.

Thanks,

- Juri

 Regards,
 Wanpeng Li
  
 
 
 Thanks,
 
 - Juri
 
  Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 mailto:wanpeng...@linux.intel.com
  ---
   kernel/sched/core.c |  1 +
   kernel/sched/deadline.c | 25 +
   kernel/sched/sched.h|  1 +
   3 files changed, 27 insertions(+)
 
  diff --git a/kernel/sched/core.c b/kernel/sched/core.c
  index 28b0d75..c940999 100644
  --- a/kernel/sched/core.c
  +++ b/kernel/sched/core.c
  @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq,
 struct root_domain *rd)
rq-rd = rd;
 
cpumask_set_cpu(rq-cpu, rd-span);
  + attach_dl_bw(rq);
if (cpumask_test_cpu(rq-cpu, cpu_active_mask))
set_rq_online(rq);
 
  diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
  index 5e95145..62680d7 100644
  --- a/kernel/sched/deadline.c
  +++ b/kernel/sched/deadline.c
  @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
   {
struct rq *later_rq = NULL;
bool fallback = false;
  + struct dl_bw *dl_b;
 
later_rq = find_lock_later_rq(p, rq);
 
  @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct
 rq *rq, struct task_struct *p)
set_task_cpu(p, later_rq-cpu);
activate_task(later_rq, p, ENQUEUE_REPLENISH);
 
  + dl_b = dl_bw_of(later_rq-cpu);
  + raw_spin_lock(dl_b-lock);
  + __dl_add(dl_b, p-dl.dl_bw);
  + raw_spin_unlock(dl_b-lock);
  +
if (!fallback)
resched_curr(later_rq);
 
  @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq,
 struct task_struct *p,
switched_to_dl(rq, p);
   }
 
  +void attach_dl_bw(struct rq *rq)
  +{
  + struct rb_node *next_node = rq-dl.rb_leftmost;
  + struct sched_dl_entity *dl_se;
  + struct dl_bw *dl_b;
  +
  + dl_b = dl_bw_of(rq-cpu);
  + raw_spin_lock(dl_b-lock);
  +next_node:
  + if (next_node) {
  + dl_se = rb_entry(next_node, struct sched_dl_entity,
 rb_node);
  + __dl_add(dl_b, dl_se-dl_bw);
  + next_node = rb_next(next_node);
  +
  + goto next_node;
  + }
  + raw_spin_unlock(dl_b-lock);
  +}
  +
   const struct sched_class dl_sched_class = {
.next   = rt_sched_class,
.enqueue_task   = enqueue_task_dl,
  diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
  index e0e1299..a7b1a59 100644
  --- a/kernel/sched/sched.h
  +++ b/kernel/sched/sched.h
  @@ -1676,6 +1676,7 @@ extern void init_dl_rq(struct dl_rq *dl_rq);
 
   extern void cfs_bandwidth_usage_inc(void);
   extern void 

Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-05-06 Thread Juri Lelli
Hi Wanpeng,

I finally got to review this, sorry about the huge delay.

On 07/04/2015 04:36, Wanpeng Li wrote:
> The total used dl bandwidth of each root domain will be reset to 0 after 
> cpu hotplug when rebuild sched domains, since the call path is:
> 
> _cpu_down
>   cpuset_cpu_inactive() 
> cpuset_update_active_cpus()
>   partition_sched_domains()
> build_sched_domains() 
>   init_rootdomain() 
> init_dl_bw() 
> 
> The bandwidth which dl task occupy will be released when dl task dead,
> it will be minus from total used dl bandwidth of its root domain, 
> however, bandwidth overflow occurs since total used dl bandwidth is 0.
> 

Right, that's a bug.

> This patch fix it by attaching the bandwidth which dl task occupy to 
> the new root domain when the task is migrating since cpu hotplug, and
> attach all the used dl bandwidth of dl tasks to the new root domain 
> when sched domains are rebuild.
> 

But, I think this fix has still a couple of problems:

 - what happens if a DL task is simply sleeping when domains are
   reconfigured?

 - def_root_domain has now multiple accounting problems, as you do
   this thing even when a cpu is moved there in the cpuoff path

Also, runqueue (and throttling) information are dynamic, while we
are trying to fix a static problem. It's probably not a good idea
mixing them.

I'm not sure how (I need more time to think it through), but can
we maybe fix this using cpuset information?

Thanks,

- Juri

> Signed-off-by: Wanpeng Li 
> ---
>  kernel/sched/core.c |  1 +
>  kernel/sched/deadline.c | 25 +
>  kernel/sched/sched.h|  1 +
>  3 files changed, 27 insertions(+)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 28b0d75..c940999 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq, struct 
> root_domain *rd)
>   rq->rd = rd;
>  
>   cpumask_set_cpu(rq->cpu, rd->span);
> + attach_dl_bw(rq);
>   if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
>   set_rq_online(rq);
>  
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 5e95145..62680d7 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct rq *rq, 
> struct task_struct *p)
>  {
>   struct rq *later_rq = NULL;
>   bool fallback = false;
> + struct dl_bw *dl_b;
>  
>   later_rq = find_lock_later_rq(p, rq);
>  
> @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct rq *rq, 
> struct task_struct *p)
>   set_task_cpu(p, later_rq->cpu);
>   activate_task(later_rq, p, ENQUEUE_REPLENISH);
>  
> + dl_b = dl_bw_of(later_rq->cpu);
> + raw_spin_lock(_b->lock);
> + __dl_add(dl_b, p->dl.dl_bw);
> + raw_spin_unlock(_b->lock);
> +
>   if (!fallback)
>   resched_curr(later_rq);
>  
> @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq, struct 
> task_struct *p,
>   switched_to_dl(rq, p);
>  }
>  
> +void attach_dl_bw(struct rq *rq)
> +{
> + struct rb_node *next_node = rq->dl.rb_leftmost;
> + struct sched_dl_entity *dl_se;
> + struct dl_bw *dl_b;
> +
> + dl_b = dl_bw_of(rq->cpu);
> + raw_spin_lock(_b->lock);
> +next_node:
> + if (next_node) {
> + dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
> + __dl_add(dl_b, dl_se->dl_bw);
> + next_node = rb_next(next_node);
> +
> + goto next_node;
> + }
> + raw_spin_unlock(_b->lock);
> +}
> +
>  const struct sched_class dl_sched_class = {
>   .next   = _sched_class,
>   .enqueue_task   = enqueue_task_dl,
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e0e1299..a7b1a59 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1676,6 +1676,7 @@ extern void init_dl_rq(struct dl_rq *dl_rq);
>  
>  extern void cfs_bandwidth_usage_inc(void);
>  extern void cfs_bandwidth_usage_dec(void);
> +void attach_dl_bw(struct rq *rq);
>  
>  #ifdef CONFIG_NO_HZ_COMMON
>  enum rq_nohz_flag_bits {
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] sched/deadline: fix dl bandwidth of root domain overflow after dl task dead

2015-05-06 Thread Juri Lelli
Hi Wanpeng,

I finally got to review this, sorry about the huge delay.

On 07/04/2015 04:36, Wanpeng Li wrote:
 The total used dl bandwidth of each root domain will be reset to 0 after 
 cpu hotplug when rebuild sched domains, since the call path is:
 
 _cpu_down
   cpuset_cpu_inactive() 
 cpuset_update_active_cpus()
   partition_sched_domains()
 build_sched_domains() 
   init_rootdomain() 
 init_dl_bw() 
 
 The bandwidth which dl task occupy will be released when dl task dead,
 it will be minus from total used dl bandwidth of its root domain, 
 however, bandwidth overflow occurs since total used dl bandwidth is 0.
 

Right, that's a bug.

 This patch fix it by attaching the bandwidth which dl task occupy to 
 the new root domain when the task is migrating since cpu hotplug, and
 attach all the used dl bandwidth of dl tasks to the new root domain 
 when sched domains are rebuild.
 

But, I think this fix has still a couple of problems:

 - what happens if a DL task is simply sleeping when domains are
   reconfigured?

 - def_root_domain has now multiple accounting problems, as you do
   this thing even when a cpu is moved there in the cpuoff path

Also, runqueue (and throttling) information are dynamic, while we
are trying to fix a static problem. It's probably not a good idea
mixing them.

I'm not sure how (I need more time to think it through), but can
we maybe fix this using cpuset information?

Thanks,

- Juri

 Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 ---
  kernel/sched/core.c |  1 +
  kernel/sched/deadline.c | 25 +
  kernel/sched/sched.h|  1 +
  3 files changed, 27 insertions(+)
 
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 index 28b0d75..c940999 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
 @@ -5586,6 +5586,7 @@ static void rq_attach_root(struct rq *rq, struct 
 root_domain *rd)
   rq-rd = rd;
  
   cpumask_set_cpu(rq-cpu, rd-span);
 + attach_dl_bw(rq);
   if (cpumask_test_cpu(rq-cpu, cpu_active_mask))
   set_rq_online(rq);
  
 diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
 index 5e95145..62680d7 100644
 --- a/kernel/sched/deadline.c
 +++ b/kernel/sched/deadline.c
 @@ -224,6 +224,7 @@ static void dl_task_offline_migration(struct rq *rq, 
 struct task_struct *p)
  {
   struct rq *later_rq = NULL;
   bool fallback = false;
 + struct dl_bw *dl_b;
  
   later_rq = find_lock_later_rq(p, rq);
  
 @@ -258,6 +259,11 @@ static void dl_task_offline_migration(struct rq *rq, 
 struct task_struct *p)
   set_task_cpu(p, later_rq-cpu);
   activate_task(later_rq, p, ENQUEUE_REPLENISH);
  
 + dl_b = dl_bw_of(later_rq-cpu);
 + raw_spin_lock(dl_b-lock);
 + __dl_add(dl_b, p-dl.dl_bw);
 + raw_spin_unlock(dl_b-lock);
 +
   if (!fallback)
   resched_curr(later_rq);
  
 @@ -1776,6 +1782,25 @@ static void prio_changed_dl(struct rq *rq, struct 
 task_struct *p,
   switched_to_dl(rq, p);
  }
  
 +void attach_dl_bw(struct rq *rq)
 +{
 + struct rb_node *next_node = rq-dl.rb_leftmost;
 + struct sched_dl_entity *dl_se;
 + struct dl_bw *dl_b;
 +
 + dl_b = dl_bw_of(rq-cpu);
 + raw_spin_lock(dl_b-lock);
 +next_node:
 + if (next_node) {
 + dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
 + __dl_add(dl_b, dl_se-dl_bw);
 + next_node = rb_next(next_node);
 +
 + goto next_node;
 + }
 + raw_spin_unlock(dl_b-lock);
 +}
 +
  const struct sched_class dl_sched_class = {
   .next   = rt_sched_class,
   .enqueue_task   = enqueue_task_dl,
 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
 index e0e1299..a7b1a59 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
 @@ -1676,6 +1676,7 @@ extern void init_dl_rq(struct dl_rq *dl_rq);
  
  extern void cfs_bandwidth_usage_inc(void);
  extern void cfs_bandwidth_usage_dec(void);
 +void attach_dl_bw(struct rq *rq);
  
  #ifdef CONFIG_NO_HZ_COMMON
  enum rq_nohz_flag_bits {
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/