The patch titled
sched: HT optimisation
has been added to the -mm tree. Its filename is
sched-ht-optimisation.patch
Patches currently in -mm which might be from [EMAIL PROTECTED] are
ia64-cpuset-build_sched_domains-mangles-structures.patch
mm-comment-rmap.patch
mm-micro-optimise-rmap.patch
mm-cleanup-rmap.patch
mm-remap-zero_page-mappings.patch
mm-remove-atomic.patch
sched-idlest-cpus_allowed-aware.patch
sched-implement-nice-support-across-physical-cpus-on-smp.patch
sched-change_prio_bias_only_if_queued.patch
sched-account_rt_tasks_in_prio_bias.patch
sched-less-newidle-locking.patch
sched-less-locking.patch
sched-ht-optimisation.patch
sched-consider-migration-thread-with-smp-nice.patch
sched2-sched-domain-sysctl.patch
From: Nick Piggin <[EMAIL PROTECTED]>
If an idle sibling of an HT queue encounters a busy sibling, then make
higher level load balancing of the non-idle variety.
Performance of multiprocessor HT systems with low numbers of tasks
(generally < number of virtual CPUs) can be significantly worse than the
exact same workloads when running in non-HT mode. The reason is largely
due to poor scheduling behaviour.
This patch improves the situation, making the performance gap far less
significant on one problematic test case (tbench).
Signed-off-by: Nick Piggin <[EMAIL PROTECTED]>
Acked-by: Ingo Molnar <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
---
kernel/sched.c | 34 ++++++++++++++++++++++++++++------
1 files changed, 28 insertions(+), 6 deletions(-)
diff -puN kernel/sched.c~sched-ht-optimisation kernel/sched.c
--- devel/kernel/sched.c~sched-ht-optimisation 2005-08-29 23:36:14.000000000
-0700
+++ devel-akpm/kernel/sched.c 2005-08-29 23:36:14.000000000 -0700
@@ -1980,7 +1980,7 @@ out:
*/
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
- unsigned long *imbalance, enum idle_type idle)
+ unsigned long *imbalance, enum idle_type idle, int *sd_idle)
{
struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -2005,6 +2005,9 @@ find_busiest_group(struct sched_domain *
avg_load = 0;
for_each_cpu_mask(i, group->cpumask) {
+ if (*sd_idle && !idle_cpu(i))
+ *sd_idle = 0;
+
/* Bias balancing toward cpus of our domain */
if (local_group)
load = __target_load(i, load_idx, idle);
@@ -2149,10 +2152,14 @@ static int load_balance(int this_cpu, ru
unsigned long imbalance;
int nr_moved, all_pinned = 0;
int active_balance = 0;
+ int sd_idle = 0;
+
+ if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER)
+ sd_idle = 1;
schedstat_inc(sd, lb_cnt[idle]);
- group = find_busiest_group(sd, this_cpu, &imbalance, idle);
+ group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle);
if (!group) {
schedstat_inc(sd, lb_nobusyg[idle]);
goto out_balanced;
@@ -2225,6 +2232,8 @@ static int load_balance(int this_cpu, ru
sd->balance_interval *= 2;
}
+ if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+ return -1;
return nr_moved;
out_balanced:
@@ -2236,6 +2245,8 @@ out_balanced:
(sd->balance_interval < sd->max_interval))
sd->balance_interval *= 2;
+ if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+ return -1;
return 0;
}
@@ -2253,9 +2264,13 @@ static int load_balance_newidle(int this
runqueue_t *busiest = NULL;
unsigned long imbalance;
int nr_moved = 0;
+ int sd_idle = 0;
+ if (sd->flags & SD_SHARE_CPUPOWER)
+ sd_idle = 1;
+
schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
- group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE);
+ group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE,
&sd_idle);
if (!group) {
schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
goto out_balanced;
@@ -2280,15 +2295,19 @@ static int load_balance_newidle(int this
spin_unlock(&busiest->lock);
}
- if (!nr_moved)
+ if (!nr_moved) {
schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
- else
+ if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+ return -1;
+ } else
sd->nr_balance_failed = 0;
return nr_moved;
out_balanced:
schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
+ if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+ return -1;
sd->nr_balance_failed = 0;
return 0;
}
@@ -2413,7 +2432,10 @@ static void rebalance_tick(int this_cpu,
if (j - sd->last_balance >= interval) {
if (load_balance(this_cpu, this_rq, sd, idle)) {
- /* We've pulled tasks over so no longer idle */
+ /* We've pulled tasks over so either we're no
+ * longer idle, or one of our SMT siblings is
+ * not idle.
+ */
idle = NOT_IDLE;
}
sd->last_balance += interval;
_
-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html