sched: rt-group: deal with PI

Linux Kernel Mailing List Wed, 13 Feb 2008 12:00:11 -0800

Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=23b0fdfc9299b137bd126e9dc22f62a59dae546d
Commit:     23b0fdfc9299b137bd126e9dc22f62a59dae546d
Parent:     4cf5d77a6eefaa7a464bc34e8cb767356f10fd74
Author:     Peter Zijlstra <[EMAIL PROTECTED]>
AuthorDate: Wed Feb 13 15:45:39 2008 +0100
Committer:  Ingo Molnar <[EMAIL PROTECTED]>
CommitDate: Wed Feb 13 15:45:39 2008 +0100


    sched: rt-group: deal with PI
    
    Steven mentioned the fun case where a lock holding task will be throttled.
    
    Simple fix: allow groups that have boosted tasks to run anyway.
    
    If a runnable task in a throttled group gets boosted the dequeue/enqueue
    done by rt_mutex_setprio() is enough to unthrottle the group.
    
    This is ofcourse not quite correct. Two possible ways forward are:
      - second prio array for boosted tasks
      - boost to a prio ceiling (this would also work for deadline scheduling)
    
    Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]>
    Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
---
 kernel/sched.c    |    3 +++
 kernel/sched_rt.c |   43 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 88a17c7..cecaea6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -362,6 +362,8 @@ struct rt_rq {
        u64 rt_time;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+       unsigned long rt_nr_boosted;
+
        struct rq *rq;
        struct list_head leaf_rt_rq_list;
        struct task_group *tg;
@@ -7112,6 +7114,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
        rt_rq->rt_throttled = 0;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+       rt_rq->rt_nr_boosted = 0;
        rt_rq->rq = rq;
 #endif
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 274b40d..8d42693 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -110,6 +110,23 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
                dequeue_rt_entity(rt_se);
 }
 
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+       return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
+}
+
+static int rt_se_boosted(struct sched_rt_entity *rt_se)
+{
+       struct rt_rq *rt_rq = group_rt_rq(rt_se);
+       struct task_struct *p;
+
+       if (rt_rq)
+               return !!rt_rq->rt_nr_boosted;
+
+       p = rt_task_of(rt_se);
+       return p->prio != p->normal_prio;
+}
+
 #else
 
 static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
@@ -149,6 +166,10 @@ static inline void sched_rt_ratio_dequeue(struct rt_rq 
*rt_rq)
 {
 }
 
+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+       return rt_rq->rt_throttled;
+}
 #endif
 
 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -172,7 +193,7 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
                return 0;
 
        if (rt_rq->rt_throttled)
-               return 1;
+               return rt_rq_throttled(rt_rq);
 
        period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
        ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
@@ -183,8 +204,10 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
                rq->rt_throttled = 1;
                rt_rq->rt_throttled = 1;
 
-               sched_rt_ratio_dequeue(rt_rq);
-               return 1;
+               if (rt_rq_throttled(rt_rq)) {
+                       sched_rt_ratio_dequeue(rt_rq);
+                       return 1;
+               }
        }
 
        return 0;
@@ -265,6 +288,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct 
rt_rq *rt_rq)
 
        update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       if (rt_se_boosted(rt_se))
+               rt_rq->rt_nr_boosted++;
+#endif
 }
 
 static inline
@@ -295,6 +322,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct 
rt_rq *rt_rq)
 
        update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       if (rt_se_boosted(rt_se))
+               rt_rq->rt_nr_boosted--;
+
+       WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
+#endif
 }
 
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
@@ -303,7 +336,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
        struct rt_prio_array *array = &rt_rq->active;
        struct rt_rq *group_rq = group_rt_rq(rt_se);
 
-       if (group_rq && group_rq->rt_throttled)
+       if (group_rq && rt_rq_throttled(group_rq))
                return;
 
        list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
@@ -496,7 +529,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
        if (unlikely(!rt_rq->rt_nr_running))
                return NULL;
 
-       if (sched_rt_ratio_exceeded(rt_rq))
+       if (rt_rq_throttled(rt_rq))
                return NULL;
 
        do {
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

sched: rt-group: deal with PI

Reply via email to