sched: reintroduce SMP tunings again

Linux Kernel Mailing List Fri, 09 Nov 2007 17:06:52 -0800

Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=19978ca610946ed57c071bad63f8f6642ca1298b
Commit:     19978ca610946ed57c071bad63f8f6642ca1298b
Parent:     fa13a5a1f25f671d084d8884be96fc48d9b68275
Author:     Ingo Molnar <[EMAIL PROTECTED]>
AuthorDate: Fri Nov 9 22:39:38 2007 +0100
Committer:  Ingo Molnar <[EMAIL PROTECTED]>
CommitDate: Fri Nov 9 22:39:38 2007 +0100


    sched: reintroduce SMP tunings again
    
    Yanmin Zhang reported an aim7 regression and bisected it down to:
    
     |  commit 38ad464d410dadceda1563f36bdb0be7fe4c8938
     |  Author: Ingo Molnar <[EMAIL PROTECTED]>
     |  Date:   Mon Oct 15 17:00:02 2007 +0200
     |
     |     sched: uniform tunings
     |
     |     use the same defaults on both UP and SMP.
    
    fix this by reintroducing similar SMP tunings again. This resolves
    the regression.
    
    (also update the comments to match the ilog2(nr_cpus) tuning effect)
    
    Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
---
 kernel/sched.c      |   28 ++++++++++++++++++++++++++++
 kernel/sched_fair.c |   18 +++++++++---------
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 3f6bd11..69cae27 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4992,6 +4992,32 @@ void __cpuinit init_idle(struct task_struct *idle, int 
cpu)
  */
 cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
+/*
+ * Increase the granularity value when there are more CPUs,
+ * because with more CPUs the 'effective latency' as visible
+ * to users decreases. But the relationship is not linear,
+ * so pick a second-best guess by going with the log2 of the
+ * number of CPUs.
+ *
+ * This idea comes from the SD scheduler of Con Kolivas:
+ */
+static inline void sched_init_granularity(void)
+{
+       unsigned int factor = 1 + ilog2(num_online_cpus());
+       const unsigned long limit = 200000000;
+
+       sysctl_sched_min_granularity *= factor;
+       if (sysctl_sched_min_granularity > limit)
+               sysctl_sched_min_granularity = limit;
+
+       sysctl_sched_latency *= factor;
+       if (sysctl_sched_latency > limit)
+               sysctl_sched_latency = limit;
+
+       sysctl_sched_wakeup_granularity *= factor;
+       sysctl_sched_batch_wakeup_granularity *= factor;
+}
+
 #ifdef CONFIG_SMP
 /*
  * This is how migration works:
@@ -6688,10 +6714,12 @@ void __init sched_init_smp(void)
        /* Move init over to a non-isolated CPU */
        if (set_cpus_allowed(current, non_isolated_cpus) < 0)
                BUG();
+       sched_init_granularity();
 }
 #else
 void __init sched_init_smp(void)
 {
+       sched_init_granularity();
 }
 #endif /* CONFIG_SMP */
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c495dcf..7264814 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -22,7 +22,7 @@
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
- * (default: 20ms, units: nanoseconds)
+ * (default: 20ms * ilog(ncpus), units: nanoseconds)
  *
  * NOTE: this latency value is not the same as the concept of
  * 'timeslice length' - timeslices in CFS are of variable length
@@ -32,18 +32,18 @@
  * (to see the precise effective timeslice length of your workload,
  *  run vmstat and monitor the context-switches (cs) field)
  */
-const_debug unsigned int sysctl_sched_latency = 20000000ULL;
+unsigned int sysctl_sched_latency = 20000000ULL;
 
 /*
  * Minimal preemption granularity for CPU-bound tasks:
- * (default: 1 msec, units: nanoseconds)
+ * (default: 1 msec * ilog(ncpus), units: nanoseconds)
  */
-const_debug unsigned int sysctl_sched_min_granularity = 1000000ULL;
+unsigned int sysctl_sched_min_granularity = 1000000ULL;
 
 /*
  * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
  */
-const_debug unsigned int sched_nr_latency = 20;
+unsigned int sched_nr_latency = 20;
 
 /*
  * After fork, child runs first. (default) If set to 0 then
@@ -61,23 +61,23 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
 
 /*
  * SCHED_BATCH wake-up granularity.
- * (default: 10 msec, units: nanoseconds)
+ * (default: 10 msec * ilog(ncpus), units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
+unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
- * (default: 10 msec, units: nanoseconds)
+ * (default: 10 msec * ilog(ncpus), units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
+unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

sched: reintroduce SMP tunings again

Reply via email to