Re: [PATCH v2 2/2] mm: add sysctl to pick vmstat monitor cpu
On Wed, 19 Jun 2013, Gilad Ben-Yossef wrote: > Add a sysctl knob to enable admin to hand pick the scapegoat cpu > that will perform the extra work of preiodically checking for > new VM activity on CPUs that have switched off their vmstat_update > work item schedling. Not necessary if we use the dynticks sacrificial processor (boot cpu). Seems to be also used for RCU. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/2] mm: add sysctl to pick vmstat monitor cpu
On Wed, 19 Jun 2013, Gilad Ben-Yossef wrote: Add a sysctl knob to enable admin to hand pick the scapegoat cpu that will perform the extra work of preiodically checking for new VM activity on CPUs that have switched off their vmstat_update work item schedling. Not necessary if we use the dynticks sacrificial processor (boot cpu). Seems to be also used for RCU. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/2] mm: add sysctl to pick vmstat monitor cpu
Add a sysctl knob to enable admin to hand pick the scapegoat cpu that will perform the extra work of preiodically checking for new VM activity on CPUs that have switched off their vmstat_update work item schedling. Signed-off-by: Gilad Ben-Yossef CC: Christoph Lameter CC: Paul E. McKenney CC: linux-kernel@vger.kernel.org CC: linux...@kvack.org --- include/linux/vmstat.h |1 + kernel/sysctl.c|7 mm/vmstat.c| 72 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index a30ab79..470f1d0 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -9,6 +9,7 @@ #include extern int sysctl_stat_interval; +extern int sysctl_vmstat_monitor_cpu; #ifdef CONFIG_VM_EVENT_COUNTERS /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9edcf45..58c889e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1361,6 +1361,13 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "stat_monitor_cpu", + .data = _vmstat_monitor_cpu, + .maxlen = sizeof(sysctl_vmstat_monitor_cpu), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_MMU { diff --git a/mm/vmstat.c b/mm/vmstat.c index 6143c70..767412e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1187,7 +1187,7 @@ static const struct file_operations proc_vmstat_file_operations = { static DEFINE_PER_CPU(struct delayed_work, vmstat_work); int sysctl_stat_interval __read_mostly = HZ; static struct cpumask vmstat_cpus; -static int vmstat_monitor_cpu __read_mostly = VMSTAT_NO_CPU; +int sysctl_vmstat_monitor_cpu __read_mostly = VMSTAT_NO_CPU; static inline bool need_vmstat(int cpu) { @@ -1232,12 +1232,13 @@ static void vmstat_update(struct work_struct *w) { int cpu, this_cpu = smp_processor_id(); - if (unlikely(this_cpu == vmstat_monitor_cpu)) + if (unlikely(this_cpu == sysctl_vmstat_monitor_cpu)) for_each_cpu_not(cpu, _cpus) if (need_vmstat(cpu)) start_cpu_timer(cpu); - if (likely(refresh_cpu_vm_stats(this_cpu) || (this_cpu == vmstat_monitor_cpu))) + if (likely(refresh_cpu_vm_stats(this_cpu) || + (this_cpu == sysctl_vmstat_monitor_cpu))) schedule_delayed_work(&__get_cpu_var(vmstat_work), round_jiffies_relative(sysctl_stat_interval)); else @@ -1266,9 +1267,9 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, if (cpumask_test_cpu(cpu, _cpus)) { cancel_delayed_work_sync(_cpu(vmstat_work, cpu)); per_cpu(vmstat_work, cpu).work.func = NULL; - if(cpu == vmstat_monitor_cpu) { + if (cpu == sysctl_vmstat_monitor_cpu) { int this_cpu = smp_processor_id(); - vmstat_monitor_cpu = this_cpu; + sysctl_vmstat_monitor_cpu = this_cpu; if (!cpumask_test_cpu(this_cpu, _cpus)) start_cpu_timer(this_cpu); } @@ -1299,7 +1300,7 @@ static int __init setup_vmstat(void) register_cpu_notifier(_notifier); - vmstat_monitor_cpu = smp_processor_id(); + sysctl_vmstat_monitor_cpu = smp_processor_id(); for_each_online_cpu(cpu) setup_cpu_timer(cpu); @@ -1474,5 +1475,64 @@ fail: return -ENOMEM; } +#ifdef CONFIG_SYSCTL +/* + * proc handler for /proc/sys/mm/stat_monitor_cpu + * + * Note that there is a harmless race condition here: + * If you concurrently try to change the monitor CPU to + * a new valid one and an invalid (offline) one at the + * same time, you can get a success indication for the + * valid one, a failure for the invalid one, but end up + * with the old value. It's easily fixable but hardly + * worth the added complexity. + */ + +int proc_monitor_cpu(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + int tmp; + + /* +* We need to make sure the chosen and old monitor cpus don't +* go offline on us during the transition. +*/ + get_online_cpus(); + + tmp = sysctl_vmstat_monitor_cpu; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (ret || !write) + goto out; + + /* +* An offline CPU is a bad choice for monitoring duty. +* Abort. +*/ + if (!cpu_online(sysctl_vmstat_monitor_cpu)) { + sysctl_vmstat_monitor_cpu
[PATCH v2 2/2] mm: add sysctl to pick vmstat monitor cpu
Add a sysctl knob to enable admin to hand pick the scapegoat cpu that will perform the extra work of preiodically checking for new VM activity on CPUs that have switched off their vmstat_update work item schedling. Signed-off-by: Gilad Ben-Yossef gi...@benyossef.com CC: Christoph Lameter c...@linux.com CC: Paul E. McKenney paul...@linux.vnet.ibm.com CC: linux-kernel@vger.kernel.org CC: linux...@kvack.org --- include/linux/vmstat.h |1 + kernel/sysctl.c|7 mm/vmstat.c| 72 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index a30ab79..470f1d0 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -9,6 +9,7 @@ #include linux/atomic.h extern int sysctl_stat_interval; +extern int sysctl_vmstat_monitor_cpu; #ifdef CONFIG_VM_EVENT_COUNTERS /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9edcf45..58c889e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1361,6 +1361,13 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = stat_monitor_cpu, + .data = sysctl_vmstat_monitor_cpu, + .maxlen = sizeof(sysctl_vmstat_monitor_cpu), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif #ifdef CONFIG_MMU { diff --git a/mm/vmstat.c b/mm/vmstat.c index 6143c70..767412e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1187,7 +1187,7 @@ static const struct file_operations proc_vmstat_file_operations = { static DEFINE_PER_CPU(struct delayed_work, vmstat_work); int sysctl_stat_interval __read_mostly = HZ; static struct cpumask vmstat_cpus; -static int vmstat_monitor_cpu __read_mostly = VMSTAT_NO_CPU; +int sysctl_vmstat_monitor_cpu __read_mostly = VMSTAT_NO_CPU; static inline bool need_vmstat(int cpu) { @@ -1232,12 +1232,13 @@ static void vmstat_update(struct work_struct *w) { int cpu, this_cpu = smp_processor_id(); - if (unlikely(this_cpu == vmstat_monitor_cpu)) + if (unlikely(this_cpu == sysctl_vmstat_monitor_cpu)) for_each_cpu_not(cpu, vmstat_cpus) if (need_vmstat(cpu)) start_cpu_timer(cpu); - if (likely(refresh_cpu_vm_stats(this_cpu) || (this_cpu == vmstat_monitor_cpu))) + if (likely(refresh_cpu_vm_stats(this_cpu) || + (this_cpu == sysctl_vmstat_monitor_cpu))) schedule_delayed_work(__get_cpu_var(vmstat_work), round_jiffies_relative(sysctl_stat_interval)); else @@ -1266,9 +1267,9 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, if (cpumask_test_cpu(cpu, vmstat_cpus)) { cancel_delayed_work_sync(per_cpu(vmstat_work, cpu)); per_cpu(vmstat_work, cpu).work.func = NULL; - if(cpu == vmstat_monitor_cpu) { + if (cpu == sysctl_vmstat_monitor_cpu) { int this_cpu = smp_processor_id(); - vmstat_monitor_cpu = this_cpu; + sysctl_vmstat_monitor_cpu = this_cpu; if (!cpumask_test_cpu(this_cpu, vmstat_cpus)) start_cpu_timer(this_cpu); } @@ -1299,7 +1300,7 @@ static int __init setup_vmstat(void) register_cpu_notifier(vmstat_notifier); - vmstat_monitor_cpu = smp_processor_id(); + sysctl_vmstat_monitor_cpu = smp_processor_id(); for_each_online_cpu(cpu) setup_cpu_timer(cpu); @@ -1474,5 +1475,64 @@ fail: return -ENOMEM; } +#ifdef CONFIG_SYSCTL +/* + * proc handler for /proc/sys/mm/stat_monitor_cpu + * + * Note that there is a harmless race condition here: + * If you concurrently try to change the monitor CPU to + * a new valid one and an invalid (offline) one at the + * same time, you can get a success indication for the + * valid one, a failure for the invalid one, but end up + * with the old value. It's easily fixable but hardly + * worth the added complexity. + */ + +int proc_monitor_cpu(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + int tmp; + + /* +* We need to make sure the chosen and old monitor cpus don't +* go offline on us during the transition. +*/ + get_online_cpus(); + + tmp = sysctl_vmstat_monitor_cpu; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (ret || !write) + goto out; + + /* +* An offline CPU is a bad choice for monitoring duty. +* Abort. +