Gitweb: http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b82d9fdd848abfbe7263a4ecd9bbb55e575100a6 Commit: b82d9fdd848abfbe7263a4ecd9bbb55e575100a6 Parent: 3c90e6e99b08f01d5684a3a07cceae6a543e4fa8 Author: Peter Zijlstra <[EMAIL PROTECTED]> AuthorDate: Fri Nov 9 22:39:39 2007 +0100 Committer: Ingo Molnar <[EMAIL PROTECTED]> CommitDate: Fri Nov 9 22:39:39 2007 +0100
sched: avoid large irq-latencies in smp-balancing SMP balancing is done with IRQs disabled and can iterate the full rq. When rqs are large this can cause large irq-latencies. Limit the nr of iterations on each run. This fixes a scheduling latency regression reported by the -rt folks. Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]> Acked-by: Steven Rostedt <[EMAIL PROTECTED]> Tested-by: Gregory Haskins <[EMAIL PROTECTED]> Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]> --- include/linux/sched.h | 1 + kernel/sched.c | 15 ++++++++++----- kernel/sysctl.c | 8 ++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 93fd30d..2cc789f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1466,6 +1466,7 @@ extern unsigned int sysctl_sched_batch_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; +extern unsigned int sysctl_sched_nr_migrate; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, diff --git a/kernel/sched.c b/kernel/sched.c index 2a107e4..e195a42 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -472,6 +472,12 @@ const_debug unsigned int sysctl_sched_features = #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) /* + * Number of tasks to iterate in a single balance run. + * Limited because this is done with IRQs disabled. + */ +const_debug unsigned int sysctl_sched_nr_migrate = 32; + +/* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu * clock constructed from sched_clock(): */ @@ -2235,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, struct rq_iterator *iterator) { - int pulled = 0, pinned = 0, skip_for_load; + int loops = 0, pulled = 0, pinned = 0, skip_for_load; struct task_struct *p; long rem_load_move = max_load_move; @@ -2249,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, */ p = iterator->start(iterator->arg); next: - if (!p) + if (!p || loops++ > sysctl_sched_nr_migrate) goto out; /* - * To help distribute high priority tasks accross CPUs we don't + * To help distribute high priority tasks across CPUs we don't * skip a task if it will be the highest priority task (i.e. smallest * prio value) on its new queue regardless of its load weight */ @@ -2269,8 +2275,7 @@ next: rem_load_move -= p->se.load.weight; /* - * We only want to steal up to the prescribed number of tasks - * and the prescribed amount of weighted load. + * We only want to steal up to the prescribed amount of weighted load. */ if (rem_load_move > 0) { if (p->prio < *this_best_prio) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index adddf68..3a1744f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -301,6 +301,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_nr_migrate", + .data = &sysctl_sched_nr_migrate, + .maxlen = sizeof(unsigned int), + .mode = 644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = CTL_UNNUMBERED, - To unsubscribe from this list: send the line "unsubscribe git-commits-head" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html