Migration may occur when wake up a process, so we must update the rq->nr_uninterruptible before set_task_cpu, otherwise we will decrease the nr_interuptible of the incorrect rq. Over time, it cause some rq accounting according to be too large, but others are negative.
Also change the type of rq->nr_uninterruptible to atomic_t. Signed-off-by: Cheng Jian <[email protected]> --- kernel/sched/core.c | 14 +++++++++----- kernel/sched/debug.c | 2 +- kernel/sched/loadavg.c | 2 +- kernel/sched/sched.h | 2 +- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2b037f1..4d3bbc1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1198,7 +1198,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags) { if (task_contributes_to_load(p)) - rq->nr_uninterruptible--; + atomic_dec(&rq->nr_uninterruptible); enqueue_task(rq, p, flags); @@ -1210,7 +1210,7 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING; if (task_contributes_to_load(p)) - rq->nr_uninterruptible++; + atomic_inc(&rq->nr_uninterruptible); dequeue_task(rq, p, flags); } @@ -2135,9 +2135,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, lockdep_assert_held(&rq->lock); #ifdef CONFIG_SMP - if (p->sched_contributes_to_load) - rq->nr_uninterruptible--; - if (wake_flags & WF_MIGRATED) en_flags |= ENQUEUE_MIGRATED; #endif @@ -2500,11 +2497,15 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; + /* update the rq accounting according before set_task_cpu */ if (p->in_iowait) { delayacct_blkio_end(p); atomic_dec(&task_rq(p)->nr_iowait); } + if (p->sched_contributes_to_load) + atomic_dec(&task_rq(p)->nr_uninterruptible); + cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); if (task_cpu(p) != cpu) { wake_flags |= WF_MIGRATED; @@ -2519,6 +2520,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) atomic_dec(&task_rq(p)->nr_iowait); } + if (p->sched_contributes_to_load) + atomic_dec(&task_rq(p)->nr_uninterruptible); + #endif /* CONFIG_SMP */ ttwu_queue(p, cpu, wake_flags); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index f7e4579..fa2c1bc 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -641,7 +641,7 @@ do { \ P(nr_running); P(nr_switches); P(nr_load_updates); - P(nr_uninterruptible); + SEQ_printf(m, " .%-30s: %d\n", "nr_uninterruptible", atomic_read(&rq->nr_uninterruptible)); PN(next_balance); SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); PN(clock); diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 28a5165..cae7643 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -81,7 +81,7 @@ long calc_load_fold_active(struct rq *this_rq, long adjust) long nr_active, delta = 0; nr_active = this_rq->nr_running - adjust; - nr_active += (long)this_rq->nr_uninterruptible; + nr_active += (long)atomic_read(&this_rq->nr_uninterruptible); if (nr_active != this_rq->calc_load_active) { delta = nr_active - this_rq->calc_load_active; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 802b1f3..8429281 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -890,7 +890,7 @@ struct rq { * one CPU and if it got migrated afterwards it may decrease * it on another CPU. Always updated under the runqueue lock: */ - unsigned long nr_uninterruptible; + atomic_t nr_uninterruptible; struct task_struct *curr; struct task_struct *idle; -- 2.7.4

