deadline: improve the tracking of active utilization

Peter Zijlstra Fri, 24 Mar 2017 06:22:14 -0700

On Fri, Mar 24, 2017 at 04:52:55AM +0100, luca abeni wrote:

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d67eee8..952cac8 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -445,16 +445,33 @@ struct sched_dl_entity {
>        *
>        * @dl_yielded tells if task gave up the CPU before consuming
>        * all its available runtime during the last job.
> +      *
> +      * @dl_non_contending tells if task is inactive while still
> +      * contributing to the active utilization. In other words, it
> +      * indicates if the inactive timer has been armed and its handler
> +      * has not been executed yet. This flag is useful to avoid race
> +      * conditions between the inactive timer handler and the wakeup
> +      * code.
>        */
>       int                             dl_throttled;
>       int                             dl_boosted;
>       int                             dl_yielded;
> +     int                             dl_non_contending;


We should maybe look into making those bits :1, not something for this
patch though;


> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index cef9adb..86aed82 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -65,6 +65,107 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
>               dl_rq->running_bw = 0;
>  }
>  
> +void dl_change_utilization(struct task_struct *p, u64 new_bw)
> +{
> +     if (!task_on_rq_queued(p)) {
> +             struct rq *rq = task_rq(p);
> +
> +             if (p->dl.dl_non_contending) {
> +                     sub_running_bw(p->dl.dl_bw, &rq->dl);
> +                     p->dl.dl_non_contending = 0;
> +                     /*
> +                      * If the timer handler is currently running and the
> +                      * timer cannot be cancelled, inactive_task_timer()
> +                      * will see that dl_not_contending is not set, and
> +                      * will not touch the rq's active utilization,
> +                      * so we are still safe.
> +                      */
> +                     if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
> +                             put_task_struct(p);
> +             }
> +     }
> +}

If we rearrange that slightly we can avoid the double indent:


--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -67,23 +67,23 @@ void sub_running_bw(u64 dl_bw, struct dl
 
 void dl_change_utilization(struct task_struct *p, u64 new_bw)
 {
-       if (!task_on_rq_queued(p)) {
-               struct rq *rq = task_rq(p);
+       if (task_on_rq_queued(p))
+               return;
 
-               if (p->dl.dl_non_contending) {
-                       sub_running_bw(p->dl.dl_bw, &rq->dl);
-                       p->dl.dl_non_contending = 0;
-                       /*
-                        * If the timer handler is currently running and the
-                        * timer cannot be cancelled, inactive_task_timer()
-                        * will see that dl_not_contending is not set, and
-                        * will not touch the rq's active utilization,
-                        * so we are still safe.
-                        */
-                       if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
-                               put_task_struct(p);
-               }
-       }
+       if (!p->dl.dl_non_contending)
+               return;
+
+       sub_running_bw(p->dl.dl_bw, &task_rq(p)->dl);
+       p->dl.dl_non_contending = 0;
+       /*
+        * If the timer handler is currently running and the
+        * timer cannot be cancelled, inactive_task_timer()
+        * will see that dl_not_contending is not set, and
+        * will not touch the rq's active utilization,
+        * so we are still safe.
+        */
+       if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
+               put_task_struct(p);
 }
 
 static void task_non_contending(struct task_struct *p)

> +
> +static void task_non_contending(struct task_struct *p)
> +{

> +}
> +
> +static void task_contending(struct sched_dl_entity *dl_se)
> +{
> +     struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
> +
> +     /*
> +      * If this is a non-deadline task that has been boosted,
> +      * do nothing
> +      */
> +     if (dl_se->dl_runtime == 0)
> +             return;
> +
> +     if (dl_se->dl_non_contending) {
> +             /*
> +              * If the timer handler is currently running and the
> +              * timer cannot be cancelled, inactive_task_timer()
> +              * will see that dl_not_contending is not set, and
> +              * will not touch the rq's active utilization,
> +              * so we are still safe.
> +              */
> +             if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
> +                     put_task_struct(dl_task_of(dl_se));
> +             dl_se->dl_non_contending = 0;

This had me worried for a little while as being a use-after-free, but
this put_task_struct() cannot be the last in this case. Might be worth a
comment.

> +     } else {
> +             /*
> +              * Since "dl_non_contending" is not set, the
> +              * task's utilization has already been removed from
> +              * active utilization (either when the task blocked,
> +              * when the "inactive timer" fired).
> +              * So, add it back.
> +              */
> +             add_running_bw(dl_se->dl_bw, dl_rq);
> +     }
> +}

In general I feel it would be nice to have a state diagram included
somewhere near these two functions. It would be nice to not have to dig
out the PDF every time.

> +static void migrate_task_rq_dl(struct task_struct *p)
> +{
> +     if ((p->state == TASK_WAKING) && (p->dl.dl_non_contending)) {
> +             struct rq *rq = task_rq(p);
> +
> +             raw_spin_lock(&rq->lock);
> +             sub_running_bw(p->dl.dl_bw, &rq->dl);
> +             p->dl.dl_non_contending = 0;
> +             /*
> +              * If the timer handler is currently running and the
> +              * timer cannot be cancelled, inactive_task_timer()
> +              * will see that dl_not_contending is not set, and
> +              * will not touch the rq's active utilization,
> +              * so we are still safe.
> +              */
> +             if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
> +                     put_task_struct(p);
> +
> +             raw_spin_unlock(&rq->lock);
> +     }
> +}

This can similarly be reduced in indent, albeit this is only a single
indent level, so not as onerous as the other one.

What had me puzzled for a while here is taking the lock; because some
callers of set_task_cpu() must in fact hold this lock already. Worth a
comment I feel.

Once I figured out the exact locking; I realized you do this on
cross-cpu wakeups. We spend a fair amount of effort not to take both rq
locks. But I suppose you have to here.

Re: [RFC v5 2/9] sched/deadline: improve the tracking of active utilization

Reply via email to