This patch introduces the sysctl for sched_domain based migration costs. These in turn can be used for performance tuning of workloads.
Signed-off-by: Rohit Jain <rohit.k.j...@oracle.com> --- include/linux/sched/sysctl.h | 2 ++ kernel/sched/fair.c | 4 +++- kernel/sched/topology.c | 8 ++++---- kernel/sysctl.c | 14 ++++++++++++++ 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 1c1a151..d597f6c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -39,6 +39,8 @@ extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG extern __read_mostly unsigned int sysctl_sched_migration_cost; +extern __read_mostly unsigned int sysctl_sched_core_migration_cost; +extern __read_mostly unsigned int sysctl_sched_thread_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; extern __read_mostly unsigned int sysctl_sched_time_avg; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 61d3508..f395adc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -99,7 +99,9 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; unsigned int sysctl_sched_wakeup_granularity = 1000000UL; unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; -const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +const_debug unsigned int sysctl_sched_core_migration_cost = 500000UL; +const_debug unsigned int sysctl_sched_thread_migration_cost = 0UL; #ifdef CONFIG_SMP /* diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index bcd8c64..fc147db 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1148,14 +1148,14 @@ sd_init(struct sched_domain_topology_level *tl, sd->flags |= SD_PREFER_SIBLING; sd->imbalance_pct = 110; sd->smt_gain = 1178; /* ~15% */ - sd->sched_migration_cost = 0; + sd->sched_migration_cost = sysctl_sched_thread_migration_cost; } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->flags |= SD_PREFER_SIBLING; sd->imbalance_pct = 117; sd->cache_nice_tries = 1; sd->busy_idx = 2; - sd->sched_migration_cost = 500000UL; + sd->sched_migration_cost = sysctl_sched_core_migration_cost; #ifdef CONFIG_NUMA } else if (sd->flags & SD_NUMA) { @@ -1164,7 +1164,7 @@ sd_init(struct sched_domain_topology_level *tl, sd->idle_idx = 2; sd->flags |= SD_SERIALIZE; - sd->sched_migration_cost = 5000000UL; + sd->sched_migration_cost = sysctl_sched_migration_cost; if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) { sd->flags &= ~(SD_BALANCE_EXEC | SD_BALANCE_FORK | @@ -1177,7 +1177,7 @@ sd_init(struct sched_domain_topology_level *tl, sd->cache_nice_tries = 1; sd->busy_idx = 2; sd->idle_idx = 1; - sd->sched_migration_cost = 5000000UL; + sd->sched_migration_cost = sysctl_sched_migration_cost; } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 557d467..0920795 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -356,6 +356,20 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "sched_core_migration_cost_ns", + .data = &sysctl_sched_core_migration_cost, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_thread_migration_cost_ns", + .data = &sysctl_sched_thread_migration_cost, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "sched_nr_migrate", .data = &sysctl_sched_nr_migrate, .maxlen = sizeof(unsigned int), -- 2.7.4