[PATCH 1/7] sched: rt throttling vs no_hz

2008-01-04 Thread Peter Zijlstra
We need to teach no_hz about the rt throttling because its tick driven.

Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]>
---
 include/linux/sched.h|2 ++
 kernel/sched.c   |   23 ++-
 kernel/sched_rt.c|   30 --
 kernel/time/tick-sched.c |5 +
 4 files changed, 45 insertions(+), 15 deletions(-)

Index: linux-2.6/include/linux/sched.h
===
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -230,6 +230,8 @@ static inline int select_nohz_load_balan
 }
 #endif
 
+extern unsigned long rt_needs_cpu(int cpu);
+
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
Index: linux-2.6/kernel/sched.c
===
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -442,6 +442,7 @@ struct rq {
struct cfs_rq cfs;
struct rt_rq rt;
u64 rt_period_expire;
+   int rt_throttled;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
@@ -594,6 +595,23 @@ static void update_rq_clock(struct rq *r
 #define task_rq(p) cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)  (cpu_rq(cpu)->curr)
 
+unsigned long rt_needs_cpu(int cpu)
+{
+   struct rq *rq = cpu_rq(cpu);
+   u64 delta;
+
+   if (!rq->rt_throttled)
+   return 0;
+
+   if (rq->clock > rq->rt_period_expire)
+   return 1;
+
+   delta = rq->rt_period_expire - rq->clock;
+   do_div(delta, NSEC_PER_SEC / HZ);
+
+   return (unsigned long)delta;
+}
+
 /*
  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
  */
@@ -7099,9 +7117,11 @@ static void init_rt_rq(struct rt_rq *rt_
/* delimiter for bitsearch: */
__set_bit(MAX_RT_PRIO, array->bitmap);
 
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+   rt_rq->highest_prio = MAX_RT_PRIO;
+#endif
 #ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
-   rt_rq->highest_prio = MAX_RT_PRIO;
rt_rq->overloaded = 0;
 #endif
 
@@ -7186,6 +7206,7 @@ void __init sched_init(void)
list_add(_task_group.list, _groups);
 #endif
rq->rt_period_expire = 0;
+   rq->rt_throttled = 0;
 
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
rq->cpu_load[j] = 0;
Index: linux-2.6/kernel/sched_rt.c
===
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -175,7 +175,11 @@ static int sched_rt_ratio_exceeded(struc
ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
 
if (rt_rq->rt_time > ratio) {
+   struct rq *rq = rq_of_rt_rq(rt_rq);
+
+   rq->rt_throttled = 1;
rt_rq->rt_throttled = 1;
+
sched_rt_ratio_dequeue(rt_rq);
return 1;
}
@@ -183,18 +187,6 @@ static int sched_rt_ratio_exceeded(struc
return 0;
 }
 
-static void __update_sched_rt_period(struct rt_rq *rt_rq, u64 period)
-{
-   unsigned long rt_ratio = sched_rt_ratio(rt_rq);
-   u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-
-   rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
-   if (rt_rq->rt_throttled) {
-   rt_rq->rt_throttled = 0;
-   sched_rt_ratio_enqueue(rt_rq);
-   }
-}
-
 static void update_sched_rt_period(struct rq *rq)
 {
struct rt_rq *rt_rq;
@@ -204,8 +196,18 @@ static void update_sched_rt_period(struc
period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
rq->rt_period_expire += period;
 
-   for_each_leaf_rt_rq(rt_rq, rq)
-   __update_sched_rt_period(rt_rq, period);
+   for_each_leaf_rt_rq(rt_rq, rq) {
+   unsigned long rt_ratio = sched_rt_ratio(rt_rq);
+   u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+
+   rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
+   if (rt_rq->rt_throttled) {
+   rt_rq->rt_throttled = 0;
+   sched_rt_ratio_enqueue(rt_rq);
+   }
+   }
+
+   rq->rt_throttled = 0;
}
 }
 
Index: linux-2.6/kernel/time/tick-sched.c
===
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -153,6 +153,7 @@ void tick_nohz_update_jiffies(void)
 void tick_nohz_stop_sched_tick(void)
 {
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
+   unsigned long rt_jiffies;
struct tick_sched *ts;
ktime_t last_update, expires, now, delta;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
@@ -216,6 +217,10 @@ void tick_nohz_stop_sched_tick(void)

[PATCH 1/7] sched: rt throttling vs no_hz

2008-01-04 Thread Peter Zijlstra
We need to teach no_hz about the rt throttling because its tick driven.

Signed-off-by: Peter Zijlstra [EMAIL PROTECTED]
---
 include/linux/sched.h|2 ++
 kernel/sched.c   |   23 ++-
 kernel/sched_rt.c|   30 --
 kernel/time/tick-sched.c |5 +
 4 files changed, 45 insertions(+), 15 deletions(-)

Index: linux-2.6/include/linux/sched.h
===
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -230,6 +230,8 @@ static inline int select_nohz_load_balan
 }
 #endif
 
+extern unsigned long rt_needs_cpu(int cpu);
+
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
Index: linux-2.6/kernel/sched.c
===
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -442,6 +442,7 @@ struct rq {
struct cfs_rq cfs;
struct rt_rq rt;
u64 rt_period_expire;
+   int rt_throttled;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
@@ -594,6 +595,23 @@ static void update_rq_clock(struct rq *r
 #define task_rq(p) cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)  (cpu_rq(cpu)-curr)
 
+unsigned long rt_needs_cpu(int cpu)
+{
+   struct rq *rq = cpu_rq(cpu);
+   u64 delta;
+
+   if (!rq-rt_throttled)
+   return 0;
+
+   if (rq-clock  rq-rt_period_expire)
+   return 1;
+
+   delta = rq-rt_period_expire - rq-clock;
+   do_div(delta, NSEC_PER_SEC / HZ);
+
+   return (unsigned long)delta;
+}
+
 /*
  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
  */
@@ -7099,9 +7117,11 @@ static void init_rt_rq(struct rt_rq *rt_
/* delimiter for bitsearch: */
__set_bit(MAX_RT_PRIO, array-bitmap);
 
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+   rt_rq-highest_prio = MAX_RT_PRIO;
+#endif
 #ifdef CONFIG_SMP
rt_rq-rt_nr_migratory = 0;
-   rt_rq-highest_prio = MAX_RT_PRIO;
rt_rq-overloaded = 0;
 #endif
 
@@ -7186,6 +7206,7 @@ void __init sched_init(void)
list_add(init_task_group.list, task_groups);
 #endif
rq-rt_period_expire = 0;
+   rq-rt_throttled = 0;
 
for (j = 0; j  CPU_LOAD_IDX_MAX; j++)
rq-cpu_load[j] = 0;
Index: linux-2.6/kernel/sched_rt.c
===
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -175,7 +175,11 @@ static int sched_rt_ratio_exceeded(struc
ratio = (period * rt_ratio)  SCHED_RT_FRAC_SHIFT;
 
if (rt_rq-rt_time  ratio) {
+   struct rq *rq = rq_of_rt_rq(rt_rq);
+
+   rq-rt_throttled = 1;
rt_rq-rt_throttled = 1;
+
sched_rt_ratio_dequeue(rt_rq);
return 1;
}
@@ -183,18 +187,6 @@ static int sched_rt_ratio_exceeded(struc
return 0;
 }
 
-static void __update_sched_rt_period(struct rt_rq *rt_rq, u64 period)
-{
-   unsigned long rt_ratio = sched_rt_ratio(rt_rq);
-   u64 ratio = (period * rt_ratio)  SCHED_RT_FRAC_SHIFT;
-
-   rt_rq-rt_time -= min(rt_rq-rt_time, ratio);
-   if (rt_rq-rt_throttled) {
-   rt_rq-rt_throttled = 0;
-   sched_rt_ratio_enqueue(rt_rq);
-   }
-}
-
 static void update_sched_rt_period(struct rq *rq)
 {
struct rt_rq *rt_rq;
@@ -204,8 +196,18 @@ static void update_sched_rt_period(struc
period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
rq-rt_period_expire += period;
 
-   for_each_leaf_rt_rq(rt_rq, rq)
-   __update_sched_rt_period(rt_rq, period);
+   for_each_leaf_rt_rq(rt_rq, rq) {
+   unsigned long rt_ratio = sched_rt_ratio(rt_rq);
+   u64 ratio = (period * rt_ratio)  SCHED_RT_FRAC_SHIFT;
+
+   rt_rq-rt_time -= min(rt_rq-rt_time, ratio);
+   if (rt_rq-rt_throttled) {
+   rt_rq-rt_throttled = 0;
+   sched_rt_ratio_enqueue(rt_rq);
+   }
+   }
+
+   rq-rt_throttled = 0;
}
 }
 
Index: linux-2.6/kernel/time/tick-sched.c
===
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -153,6 +153,7 @@ void tick_nohz_update_jiffies(void)
 void tick_nohz_stop_sched_tick(void)
 {
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
+   unsigned long rt_jiffies;
struct tick_sched *ts;
ktime_t last_update, expires, now, delta;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
@@ -216,6 +217,10 @@ void tick_nohz_stop_sched_tick(void)
next_jiffies =