Re: [PATCH 6/7] sched: rt-group: per group period
* Peter Zijlstra <[EMAIL PROTECTED]> wrote: > Could you please fold this into the 6/7 patch. > > It reverts a wandering chunk (the 32768 thing), but more importantly > it fixes !FAIR_GROUP_SCHED compilation. done. Btw., there's a new warning: kernel/sched_rt.c:197: warning: 'rt_se_boosted' defined but not used Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 6/7] sched: rt-group: per group period
Could you please fold this into the 6/7 patch. It reverts a wandering chunk (the 32768 thing), but more importantly it fixes !FAIR_GROUP_SCHED compilation. Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]> --- kernel/sched.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) Index: linux-2.6/kernel/sched.c === --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -647,7 +647,7 @@ const_debug unsigned int sysctl_sched_rt * ratio of time -rt tasks may consume. * default: 95% */ -const_debug unsigned int sysctl_sched_rt_ratio = 32768; //62259; +const_debug unsigned int sysctl_sched_rt_ratio = 62259; /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu @@ -5379,6 +5379,7 @@ static void __init sched_rt_period_init( hotcpu_notifier(sched_rt_period_hotplug, 0); } +#ifdef CONFIG_FAIR_GROUP_SCHED static void __sched_rt_period_init_tg(void *arg) { struct task_group *tg = arg; @@ -5404,12 +5405,14 @@ static void sched_rt_period_destroy_tg(s { on_each_cpu(__sched_rt_period_destroy_tg, tg, 0, 1); } -#else +#endif /* CONFIG_FAIR_GROUP_SCHED */ +#else /* CONFIG_SMP */ static void __init sched_rt_period_init(void) { sched_rt_period_start_cpu(0); } +#ifdef CONFIG_FAIR_GROUP_SCHED static void sched_rt_period_init_tg(struct task_group *tg) { sched_rt_period_start(tg->rt_rq[0]); @@ -5419,7 +5422,8 @@ static void sched_rt_period_destroy_tg(s { sched_rt_period_stop(tg->rt_rq[0]); } -#endif +#endif /* CONFIG_FAIR_GROUP_SCHED */ +#endif /* CONFIG_SMP */ #ifdef CONFIG_SMP /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 6/7] sched: rt-group: per group period
Could you please fold this into the 6/7 patch. It reverts a wandering chunk (the 32768 thing), but more importantly it fixes !FAIR_GROUP_SCHED compilation. Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- kernel/sched.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) Index: linux-2.6/kernel/sched.c === --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -647,7 +647,7 @@ const_debug unsigned int sysctl_sched_rt * ratio of time -rt tasks may consume. * default: 95% */ -const_debug unsigned int sysctl_sched_rt_ratio = 32768; //62259; +const_debug unsigned int sysctl_sched_rt_ratio = 62259; /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu @@ -5379,6 +5379,7 @@ static void __init sched_rt_period_init( hotcpu_notifier(sched_rt_period_hotplug, 0); } +#ifdef CONFIG_FAIR_GROUP_SCHED static void __sched_rt_period_init_tg(void *arg) { struct task_group *tg = arg; @@ -5404,12 +5405,14 @@ static void sched_rt_period_destroy_tg(s { on_each_cpu(__sched_rt_period_destroy_tg, tg, 0, 1); } -#else +#endif /* CONFIG_FAIR_GROUP_SCHED */ +#else /* CONFIG_SMP */ static void __init sched_rt_period_init(void) { sched_rt_period_start_cpu(0); } +#ifdef CONFIG_FAIR_GROUP_SCHED static void sched_rt_period_init_tg(struct task_group *tg) { sched_rt_period_start(tg-rt_rq[0]); @@ -5419,7 +5422,8 @@ static void sched_rt_period_destroy_tg(s { sched_rt_period_stop(tg-rt_rq[0]); } -#endif +#endif /* CONFIG_FAIR_GROUP_SCHED */ +#endif /* CONFIG_SMP */ #ifdef CONFIG_SMP /* -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 6/7] sched: rt-group: per group period
* Peter Zijlstra [EMAIL PROTECTED] wrote: Could you please fold this into the 6/7 patch. It reverts a wandering chunk (the 32768 thing), but more importantly it fixes !FAIR_GROUP_SCHED compilation. done. Btw., there's a new warning: kernel/sched_rt.c:197: warning: 'rt_se_boosted' defined but not used Ingo -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 6/7] sched: rt-group: per group period
Steven asked for per group periods in order to get closer to RMA or EDF scheduling. Use the fancy new hrtimers to provide a per group period Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]> --- include/linux/sched.h|2 kernel/sched.c | 225 +-- kernel/sched_rt.c| 61 ++-- kernel/sysctl.c |2 kernel/time/tick-sched.c |5 - 5 files changed, 232 insertions(+), 63 deletions(-) Index: linux-2.6/include/linux/sched.h === --- linux-2.6.orig/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -230,8 +230,6 @@ static inline int select_nohz_load_balan } #endif -extern unsigned long rt_needs_cpu(int cpu); - /* * Only dump TASK_* tasks. (0 for all tasks) */ Index: linux-2.6/kernel/sched.c === --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -177,6 +177,7 @@ struct task_group { struct rt_rq **rt_rq; unsigned int rt_ratio; + ktime_t rt_period; /* * shares assigned to a task group governs how much of cpu bandwidth @@ -372,6 +373,7 @@ struct rt_rq { #endif int rt_throttled; u64 rt_time; + struct hrtimer rt_period_timer; #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; @@ -441,8 +443,6 @@ struct rq { struct cfs_rq cfs; struct rt_rq rt; - u64 rt_period_expire; - int rt_throttled; #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ @@ -595,23 +595,6 @@ static void update_rq_clock(struct rq *r #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -unsigned long rt_needs_cpu(int cpu) -{ - struct rq *rq = cpu_rq(cpu); - u64 delta; - - if (!rq->rt_throttled) - return 0; - - if (rq->clock > rq->rt_period_expire) - return 1; - - delta = rq->rt_period_expire - rq->clock; - do_div(delta, NSEC_PER_SEC / HZ); - - return (unsigned long)delta; -} - /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ @@ -652,10 +635,10 @@ const_debug unsigned int sysctl_sched_fe const_debug unsigned int sysctl_sched_nr_migrate = 32; /* - * period over which we measure -rt task cpu usage in ms. + * period over which we measure -rt task cpu usage in us. * default: 1s */ -const_debug unsigned int sysctl_sched_rt_period = 1000; +const_debug unsigned int sysctl_sched_rt_period = 100; #define SCHED_RT_FRAC_SHIFT16 #define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) @@ -664,7 +647,7 @@ const_debug unsigned int sysctl_sched_rt * ratio of time -rt tasks may consume. * default: 95% */ -const_debug unsigned int sysctl_sched_rt_ratio = 62259; +const_debug unsigned int sysctl_sched_rt_ratio = 32768; //62259; /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu @@ -1245,6 +1228,12 @@ static unsigned long cpu_avg_load_per_ta static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); #endif /* CONFIG_SMP */ +static inline ktime_t ns_to_ktime(u64 ns) +{ + static const ktime_t ktime_zero = { .tv64 = 0 }; + return ktime_add_ns(ktime_zero, ns); +} + #include "sched_stats.h" #include "sched_idletask.c" #include "sched_fair.c" @@ -3741,7 +3730,6 @@ void scheduler_tick(void) rq->tick_timestamp = rq->clock; update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); - update_sched_rt_period(rq); spin_unlock(>lock); #ifdef CONFIG_SMP @@ -5287,6 +5275,152 @@ static inline void sched_init_granularit sysctl_sched_batch_wakeup_granularity *= factor; } +static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) +{ + struct rt_rq *rt_rq = + container_of(timer, struct rt_rq, rt_period_timer); + struct rq *rq = rq_of_rt_rq(rt_rq); + ktime_t now = ktime_get(); + + WARN_ON(smp_processor_id() != cpu_of(rq)); + WARN_ON(!in_irq()); + + spin_lock(>lock); + update_sched_rt_period(rt_rq); + spin_unlock(>lock); + + hrtimer_forward(timer, now, sched_rt_period(rt_rq)); + return HRTIMER_RESTART; +} + +static void sched_rt_period_start(struct rt_rq *rt_rq) +{ + ktime_t period = sched_rt_period(rt_rq); + + WARN_ON(smp_processor_id() != cpu_of(rq_of_rt_rq(rt_rq))); + + for (;;) { + ktime_t now = ktime_get(); + hrtimer_forward(_rq->rt_period_timer, now, period); + hrtimer_start(_rq->rt_period_timer, + rt_rq->rt_period_timer.expires, + HRTIMER_MODE_ABS); + if (hrtimer_active(_rq->rt_period_timer)) + break; + } +} + +static void sched_rt_period_stop(struct rt_rq *rt_rq)
[PATCH 6/7] sched: rt-group: per group period
Steven asked for per group periods in order to get closer to RMA or EDF scheduling. Use the fancy new hrtimers to provide a per group period Signed-off-by: Peter Zijlstra [EMAIL PROTECTED] --- include/linux/sched.h|2 kernel/sched.c | 225 +-- kernel/sched_rt.c| 61 ++-- kernel/sysctl.c |2 kernel/time/tick-sched.c |5 - 5 files changed, 232 insertions(+), 63 deletions(-) Index: linux-2.6/include/linux/sched.h === --- linux-2.6.orig/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -230,8 +230,6 @@ static inline int select_nohz_load_balan } #endif -extern unsigned long rt_needs_cpu(int cpu); - /* * Only dump TASK_* tasks. (0 for all tasks) */ Index: linux-2.6/kernel/sched.c === --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -177,6 +177,7 @@ struct task_group { struct rt_rq **rt_rq; unsigned int rt_ratio; + ktime_t rt_period; /* * shares assigned to a task group governs how much of cpu bandwidth @@ -372,6 +373,7 @@ struct rt_rq { #endif int rt_throttled; u64 rt_time; + struct hrtimer rt_period_timer; #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; @@ -441,8 +443,6 @@ struct rq { struct cfs_rq cfs; struct rt_rq rt; - u64 rt_period_expire; - int rt_throttled; #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ @@ -595,23 +595,6 @@ static void update_rq_clock(struct rq *r #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)-curr) -unsigned long rt_needs_cpu(int cpu) -{ - struct rq *rq = cpu_rq(cpu); - u64 delta; - - if (!rq-rt_throttled) - return 0; - - if (rq-clock rq-rt_period_expire) - return 1; - - delta = rq-rt_period_expire - rq-clock; - do_div(delta, NSEC_PER_SEC / HZ); - - return (unsigned long)delta; -} - /* * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ @@ -652,10 +635,10 @@ const_debug unsigned int sysctl_sched_fe const_debug unsigned int sysctl_sched_nr_migrate = 32; /* - * period over which we measure -rt task cpu usage in ms. + * period over which we measure -rt task cpu usage in us. * default: 1s */ -const_debug unsigned int sysctl_sched_rt_period = 1000; +const_debug unsigned int sysctl_sched_rt_period = 100; #define SCHED_RT_FRAC_SHIFT16 #define SCHED_RT_FRAC (1UL SCHED_RT_FRAC_SHIFT) @@ -664,7 +647,7 @@ const_debug unsigned int sysctl_sched_rt * ratio of time -rt tasks may consume. * default: 95% */ -const_debug unsigned int sysctl_sched_rt_ratio = 62259; +const_debug unsigned int sysctl_sched_rt_ratio = 32768; //62259; /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu @@ -1245,6 +1228,12 @@ static unsigned long cpu_avg_load_per_ta static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); #endif /* CONFIG_SMP */ +static inline ktime_t ns_to_ktime(u64 ns) +{ + static const ktime_t ktime_zero = { .tv64 = 0 }; + return ktime_add_ns(ktime_zero, ns); +} + #include sched_stats.h #include sched_idletask.c #include sched_fair.c @@ -3741,7 +3730,6 @@ void scheduler_tick(void) rq-tick_timestamp = rq-clock; update_cpu_load(rq); curr-sched_class-task_tick(rq, curr, 0); - update_sched_rt_period(rq); spin_unlock(rq-lock); #ifdef CONFIG_SMP @@ -5287,6 +5275,152 @@ static inline void sched_init_granularit sysctl_sched_batch_wakeup_granularity *= factor; } +static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) +{ + struct rt_rq *rt_rq = + container_of(timer, struct rt_rq, rt_period_timer); + struct rq *rq = rq_of_rt_rq(rt_rq); + ktime_t now = ktime_get(); + + WARN_ON(smp_processor_id() != cpu_of(rq)); + WARN_ON(!in_irq()); + + spin_lock(rq-lock); + update_sched_rt_period(rt_rq); + spin_unlock(rq-lock); + + hrtimer_forward(timer, now, sched_rt_period(rt_rq)); + return HRTIMER_RESTART; +} + +static void sched_rt_period_start(struct rt_rq *rt_rq) +{ + ktime_t period = sched_rt_period(rt_rq); + + WARN_ON(smp_processor_id() != cpu_of(rq_of_rt_rq(rt_rq))); + + for (;;) { + ktime_t now = ktime_get(); + hrtimer_forward(rt_rq-rt_period_timer, now, period); + hrtimer_start(rt_rq-rt_period_timer, + rt_rq-rt_period_timer.expires, + HRTIMER_MODE_ABS); + if (hrtimer_active(rt_rq-rt_period_timer)) + break; + } +} + +static void sched_rt_period_stop(struct rt_rq *rt_rq) +{ +