When the runtime is exhausted in a RT CGroup, the scheduler checks for another non-throttled runqueue and, if available, migrates the tasks.
The bandwidth (runtime/period) chosen for a certain CGroup is replicated on every core of the system, therefore, in an SMP system with M cores, the total available bandwidth is the given runtime/period multiplied by M. Signed-off-by: Andrea Parri <[email protected]> Signed-off-by: Luca Abeni <[email protected]> Cc: Tommaso Cucinotta <[email protected]> Cc: Juri Lelli <[email protected]> Cc: Daniel Bristot de Oliveira <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Peter Zijlstra <[email protected]> Signed-off-by: Alessio Balsini <[email protected]> --- kernel/sched/deadline.c | 58 ++++++++++++++++ kernel/sched/rt.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 6 ++ 3 files changed, 235 insertions(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9a1988b..22c35c0 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -247,6 +247,61 @@ static DEFINE_PER_CPU(struct callback_head, dl_pull_head); static void push_dl_tasks(struct rq *); static void pull_dl_task(struct rq *); +#ifdef CONFIG_RT_GROUP_SCHED +static DEFINE_PER_CPU(struct callback_head, group_pull_head); +static DEFINE_PER_CPU(struct callback_head, group_push_head); + +static void dl_push_group_tasks(struct rq *rq) +{ + BUG_ON(rq->dl.rq_to_push_from == NULL); + + if ((rq->dl.rq_to_push_from->rt_nr_running > 1) || + (dl_group_of(rq->dl.rq_to_push_from)->dl_throttled == 1)) { + group_push_rt_task(rq->dl.rq_to_push_from); + } + + rq->dl.rq_to_push_from = NULL; +} + +static void dl_pull_group_tasks(struct rq *rq) +{ + BUG_ON(rq->dl.rq_to_pull_to == NULL); + BUG_ON(rq->dl.rq_to_pull_to->rq != rq); + + group_pull_rt_task(rq->dl.rq_to_pull_to); + rq->dl.rq_to_pull_to = NULL; +} + +void queue_push_from_group(struct rq *rq, struct rt_rq *rt_rq, int reason) +{ + BUG_ON(rt_rq == NULL); + BUG_ON(rt_rq->rq != rq); + + if (rq->dl.rq_to_push_from) + return; + + rq->dl.rq_to_push_from = rt_rq; + queue_balance_callback(rq, &per_cpu(group_push_head, rq->cpu), + dl_push_group_tasks); +} + +void queue_pull_to_group(struct rq *rq, struct rt_rq *rt_rq) +{ + struct sched_dl_entity *dl_se = dl_group_of(rt_rq); + + BUG_ON(rt_rq == NULL); + BUG_ON(!is_dl_group(rt_rq)); + BUG_ON(rt_rq->rq != rq); + + if (dl_se->dl_throttled || rq->dl.rq_to_pull_to) + return; + + rq->dl.rq_to_pull_to = rt_rq; + queue_balance_callback(rq, &per_cpu(group_pull_head, rq->cpu), + dl_pull_group_tasks); +} +#endif + static inline void queue_push_tasks(struct rq *rq) { if (!has_pushable_dl_tasks(rq)) @@ -626,6 +681,9 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) sched_clock_tick(); update_rq_clock(rq); +#ifdef CONFIG_SMP + group_pull_rt_task(rt_rq); +#endif dl_se->dl_throttled = 0; if (rt_rq->rt_nr_running) { enqueue_dl_entity(dl_se, dl_se, ENQUEUE_REPLENISH); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f38bd4b..dbdb0bc 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -914,6 +914,14 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rt_rq, p); +#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_SMP) + if (is_dl_group(rt_rq)) { + struct sched_dl_entity *dl_se = dl_group_of(rt_rq); + + if (dl_se->dl_throttled) + queue_push_from_group(rq, rt_rq, 2); + } +#endif } #ifdef CONFIG_SMP @@ -1532,16 +1540,173 @@ static void pull_rt_task(struct rq *this_rq) } #ifdef CONFIG_RT_GROUP_SCHED +struct rt_rq *group_find_lock_rt_rq(struct task_struct *task, + struct rt_rq *rt_rq) +{ + struct rq *rq = rq_of_rt_rq(rt_rq), *first_rq; + struct sched_dl_entity *first_dl_se; + struct rt_rq *first_rt_rq = NULL; + int cpu, tries; + + BUG_ON(!is_dl_group(rt_rq)); + + for_each_possible_cpu(cpu) { + if (cpu == -1) + continue; + if (cpu == rq->cpu) + continue; + + first_dl_se = rt_rq->tg->dl_se[cpu]; + first_rt_rq = first_dl_se->my_q; + first_rq = rq_of_rt_rq(first_rt_rq); + + tries = 0; +retry_cpu_push: + if (++tries > RT_MAX_TRIES) { + first_rt_rq = NULL; + continue; + } + + if (first_dl_se->dl_throttled) { + first_rt_rq = NULL; + continue; + } + + if (double_lock_balance(rq, first_rq)) { + + if (unlikely(task_rq(task) != rq || + task_running(rq, task) || + !task->on_rq)) { + double_unlock_balance(rq, first_rq); + + return NULL; + } + + if (unlikely(!cpumask_test_cpu(first_rq->cpu, + &task->cpus_allowed) || + first_dl_se->dl_throttled)) { + double_unlock_balance(rq, first_rq); + + goto retry_cpu_push; + } + } + + if (first_rt_rq->highest_prio.curr > task->prio) + break; + + double_unlock_balance(rq, first_rq); + first_rt_rq = NULL; + } + + return first_rt_rq; +} + +int group_push_rt_task_from_group(struct rt_rq *rt_rq) +{ + struct rq *rq = rq_of_rt_rq(rt_rq), *first_rq; + struct rt_rq *first_rt_rq; + struct task_struct *p; + int tries = 0; + +try_another_task: + p = pick_next_pushable_task(rt_rq); + if (!p) + return 0; + + get_task_struct(p); + + first_rt_rq = group_find_lock_rt_rq(p, rt_rq); + if (!first_rt_rq) { + put_task_struct(p); + + if (tries++ > RT_MAX_TRIES) + return 0; + + goto try_another_task; + } + + first_rq = rq_of_rt_rq(first_rt_rq); + + deactivate_task(rq, p, 0); + set_task_cpu(p, first_rq->cpu); + activate_task(first_rq, p, 0); + + resched_curr(first_rq); + + double_unlock_balance(rq, first_rq); + put_task_struct(p); + + return 1; +} + +int group_pull_rt_task_from_group(struct rt_rq *this_rt_rq) +{ + struct rq *this_rq = rq_of_rt_rq(this_rt_rq), *src_rq; + struct sched_dl_entity *this_dl_se, *src_dl_se; + struct rt_rq *src_rt_rq; + struct task_struct *p; + int this_cpu = this_rq->cpu, cpu, tries = 0, ret = 0; + + this_dl_se = dl_group_of(this_rt_rq); + for_each_possible_cpu(cpu) { + if (cpu == -1) + continue; + if (cpu == this_rq->cpu) + continue; + + src_dl_se = this_rt_rq->tg->dl_se[cpu]; + src_rt_rq = src_dl_se->my_q; + + if ((src_rt_rq->rt_nr_running <= 1) && !src_dl_se->dl_throttled) + continue; + + src_rq = rq_of_rt_rq(src_rt_rq); + + if (++tries > RT_MAX_TRIES) + continue; + + double_lock_balance(this_rq, src_rq); + + p = pick_highest_pushable_task(src_rt_rq, this_cpu); + + if (p && (p->prio < this_rt_rq->highest_prio.curr)) { + WARN_ON(p == src_rq->curr); + WARN_ON(!p->on_rq); + + ret = 1; + + deactivate_task(src_rq, p, 0); + set_task_cpu(p, this_cpu); + activate_task(this_rq, p, 0); + } + double_unlock_balance(this_rq, src_rq); + } + + return ret; +} + int group_push_rt_task(struct rt_rq *rt_rq) { struct rq *rq = rq_of_rt_rq(rt_rq); if (is_dl_group(rt_rq)) - return 0; + return group_push_rt_task_from_group(rt_rq); return push_rt_task(rq); } +int group_pull_rt_task(struct rt_rq *this_rt_rq) +{ + struct rq *this_rq = rq_of_rt_rq(this_rt_rq); + + if (is_dl_group(this_rt_rq)) + return group_pull_rt_task_from_group(this_rt_rq); + + pull_rt_task(this_rq); + + return 1; +} + void group_push_rt_tasks(struct rt_rq *rt_rq) { while (group_push_rt_task(rt_rq)) @@ -1609,6 +1774,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) #ifndef CONFIG_RT_GROUP_SCHED queue_pull_task(rq); +#else + queue_pull_to_group(rq, rt_rq); #endif } @@ -1644,6 +1811,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) queue_push_tasks(rq); #else if (rt_rq_of_se(&p->rt)->overloaded) { + queue_push_from_group(rq, rt_rq_of_se(&p->rt), 3); } else { if (p->prio < rq->curr->prio) resched_curr(rq); @@ -1678,6 +1846,8 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) if (oldprio < p->prio) #ifndef CONFIG_RT_GROUP_SCHED queue_pull_task(rq); +#else + queue_pull_to_group(rq, rt_rq); #endif /* * If there's a higher priority task waiting to run diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 528b41c..9dc8488 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2073,6 +2073,12 @@ int group_pull_rt_task(struct rt_rq *rt_rq); int group_push_rt_task(struct rt_rq *rt_rq); struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, struct rt_rq *rt_rq); + +#ifdef CONFIG_RT_GROUP_SCHED +void queue_push_from_group(struct rq *rq, struct rt_rq *rt_rq, int reason); +void queue_pull_to_group(struct rq *rq, struct rt_rq *rt_rq); +#endif + #if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_SMP) void dequeue_pushable_task(struct rt_rq *rt_rq, struct task_struct *p); #else -- 2.7.4

