Greetings,

I have two bug reports of absurd migration thread CPU usage, one of them
with a link to a bisection..

     https://bugs.gentoo.org/show_bug.cgi?id=394487

..fingering d670ec13 - posix-cpu-timers: Cure SMP wobbles

I reproduced with my -rt kernel and 3.4, but didn't manage to reproduce
with the 3.0 NOPREEMPT kernel it was reported against.

Three options below, two tested in my -rt kernel.

A [ ] stop class doesn't do bean counting
B [ ] stop class counts beans like everybody else
C [ ] none of the above (not appended for brevity:)

I prefer B, elite class tasks eat beans.

A: sched: fix migration thread runtime bogosity

stop class threads don't do bean counting.  Wipe the evidence of their
lowly birth, and don't try to use exec_start which is never updated.

vogelweide:/:[0]# ps l 824
F   UID    PID   PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
1     0    824      2 -100  -      0     0 cpu_st S    ?        2799:06 
[migration/57]

Signed-off-by: Mike Galbraith <[email protected]>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 82ad284..82a78a6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -974,6 +974,13 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
                sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
 
                stop->sched_class = &stop_sched_class;
+
+               /* Zero stale values for our non-accountable thread. */
+               stop->se.exec_start = 0;
+               stop->se.sum_exec_runtime = 0;
+               stop->se.prev_sum_exec_runtime = 0;
+               stop->stime = stop->stimescaled = 0;
+               stop->nvcsw = stop->nivcsw = 0;
        }
 
        cpu_rq(cpu)->stop = stop;
@@ -2803,7 +2810,8 @@ unsigned long long task_sched_runtime(struct task_struct 
*p)
        u64 ns = 0;
 
        rq = task_rq_lock(p, &flags);
-       ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
+       if (likely(p != rq->stop)
+               ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
        task_rq_unlock(rq, p, &flags);
 
        return ns;


B: sched: fix migration thread runtime bogosity

stop class threads need to do bean counting lest the below happen.

vogelweide:/:[0]# ps l 824
F   UID    PID   PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
1     0    824      2 -100  -      0     0 cpu_st S    ?        2799:06 
[migration/57]

Signed-off-by: Mike Galbraith <[email protected]>

diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7b386e8..da5eb5b 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 {
        struct task_struct *stop = rq->stop;
 
-       if (stop && stop->on_rq)
+       if (stop && stop->on_rq) {
+               stop->se.exec_start = rq->clock_task;
                return stop;
+       }
 
        return NULL;
 }
@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)
 
 static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
 {
+       struct task_struct *curr = rq->curr;
+       u64 delta_exec;
+
+       delta_exec = rq->clock_task - curr->se.exec_start;
+       if (unlikely((s64)delta_exec < 0))
+               delta_exec = 0;
+
+       schedstat_set(curr->se.statistics.exec_max,
+                       max(curr->se.statistics.exec_max, delta_exec));
+
+       curr->se.sum_exec_runtime += delta_exec;
+       account_group_exec_runtime(curr, delta_exec);
+
+       curr->se.exec_start = rq->clock_task;
+       cpuacct_charge(curr, delta_exec);
 }
 
 static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct 
*curr, int queued)
 
 static void set_curr_task_stop(struct rq *rq)
 {
+       struct task_struct *stop = rq->stop;
+
+       stop->se.exec_start = rq->clock_task;
 }
 
 static void switched_to_stop(struct rq *rq, struct task_struct *p)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to