From: luca abeni <luca.ab...@santannapisa.it> Currently, the SCHED_DEADLINE scheduler uses a global EDF scheduling algorithm, migrating tasks to CPU cores without considering the core capacity and the task utilization. This works well on homogeneous systems (SCHED_DEADLINE tasks are guaranteed to have a bounded tardiness), but presents some issues on heterogeneous systems. For example, a SCHED_DEADLINE task might be migrated on a core that has not enough processing capacity to correctly serve the task (think about a task with runtime 70ms and period 100ms migrated to a core with processing capacity 0.5)
This commit is a first step to address the issue: When a task wakes up or migrates away from a CPU core, the scheduler tries to find an idle core having enough processing capacity to serve the task. Signed-off-by: luca abeni <luca.ab...@santannapisa.it> --- kernel/sched/cpudeadline.c | 31 +++++++++++++++++++++++++++++-- kernel/sched/deadline.c | 8 ++++++-- kernel/sched/sched.h | 7 ++++++- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 50316455ea66..d21f7905b9c1 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -110,6 +110,22 @@ static inline int cpudl_maximum(struct cpudl *cp) return cp->elements[0].cpu; } +static inline int dl_task_fit(const struct sched_dl_entity *dl_se, + int cpu, u64 *c) +{ + u64 cap = (arch_scale_cpu_capacity(NULL, cpu) * arch_scale_freq_capacity(cpu)) >> SCHED_CAPACITY_SHIFT; + s64 rel_deadline = dl_se->dl_deadline; + u64 rem_runtime = dl_se->dl_runtime; + + if (c) + *c = cap; + + if ((rel_deadline * cap) >> SCHED_CAPACITY_SHIFT < rem_runtime) + return 0; + + return 1; +} + /* * cpudl_find - find the best (later-dl) CPU in the system * @cp: the cpudl max-heap context @@ -125,8 +141,19 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, if (later_mask && cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { - return 1; - } else { + int cpu; + + for_each_cpu(cpu, later_mask) { + u64 cap; + + if (!dl_task_fit(&p->dl, cpu, &cap)) + cpumask_clear_cpu(cpu, later_mask); + } + + if (!cpumask_empty(later_mask)) + return 1; + } + { int best_cpu = cpudl_maximum(cp); WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 5b981eeeb944..3436f3d8fa8f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1584,6 +1584,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) if (sd_flag != SD_BALANCE_WAKE) goto out; + if (dl_entity_is_special(&p->dl)) + goto out; + rq = cpu_rq(cpu); rcu_read_lock(); @@ -1598,10 +1601,11 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) * other hand, if it has a shorter deadline, we * try to make it stay here, it might be important. */ - if (unlikely(dl_task(curr)) && + if ((unlikely(dl_task(curr)) && (curr->nr_cpus_allowed < 2 || !dl_entity_preempt(&p->dl, &curr->dl)) && - (p->nr_cpus_allowed > 1)) { + (p->nr_cpus_allowed > 1)) || + static_branch_unlikely(&sched_asym_cpucapacity)) { int target = find_later_rq(p); if (target != -1 && diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 32d242694863..e5f9fd3aee80 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2367,7 +2367,12 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, static inline unsigned long cpu_bw_dl(struct rq *rq) { - return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; + unsigned long res; + + res = (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; + + return (res << SCHED_CAPACITY_SHIFT) / + arch_scale_cpu_capacity(NULL, rq->cpu); } static inline unsigned long cpu_util_dl(struct rq *rq) -- 2.20.1