Expedite synchronize_rcu during the SMT mode switch operation when
initiated via /sys/devices/system/cpu/smt/control interface
SMT mode switch operation i.e. between SMT 8 to SMT 1 or vice versa and
others, are user driven operations and therefore should complete as soon
as possible. Switching SMT states involves iterating over a list of CPUs
and performing hotplug operations. It was found these transitions took
significantly large amount of time to complete particularly on
high-core-count systems because system was blocked on synchronize_rcu
calls.
Below is one of the call-stacks that accounted for most of the blocking
time overhead as reported by offcputime bcc script for CPU offline
operation,
finish_task_switch
__schedule
schedule
schedule_timeout
wait_for_completion
__wait_rcu_gp
synchronize_rcu
cpuidle_uninstall_idle_handler
powernv_cpuidle_cpu_dead
cpuhp_invoke_callback
__cpuhp_invoke_callback_range
_cpu_down
cpu_device_down
cpu_subsys_offline
device_offline
online_store
dev_attr_store
sysfs_kf_write
kernfs_fop_write_iter
vfs_write
ksys_write
system_call_exception
system_call_common
- bash (29705)
5771569 ------------------------> Duration (us)
Signed-off-by: Vishal Chourasia <[email protected]>
---
include/linux/rcupdate.h | 3 +++
kernel/cpu.c | 4 ++++
2 files changed, 7 insertions(+)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7729fef249e1..f12d0d0f008d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1190,6 +1190,9 @@ rcu_head_after_call_rcu(struct rcu_head *rhp,
rcu_callback_t f)
extern int rcu_expedited;
extern int rcu_normal;
+extern void rcu_expedite_gp(void);
+extern void rcu_unexpedite_gp(void);
+
DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), __releases_shared(RCU))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index edaa37419036..f5517d64d3f3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2683,6 +2683,7 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
ret = -EBUSY;
goto out;
}
+ rcu_expedite_gp();
/* Hold cpus_write_lock() for entire batch operation. */
cpus_write_lock();
for_each_online_cpu(cpu) {
@@ -2715,6 +2716,7 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
if (!ret)
cpu_smt_control = ctrlval;
cpus_write_unlock();
+ rcu_unexpedite_gp();
arch_smt_update();
out:
cpu_maps_update_done();
@@ -2734,6 +2736,7 @@ int cpuhp_smt_enable(void)
int cpu, ret = 0;
cpu_maps_update_begin();
+ rcu_expedite_gp();
/* Hold cpus_write_lock() for entire batch operation. */
cpus_write_lock();
cpu_smt_control = CPU_SMT_ENABLED;
@@ -2750,6 +2753,7 @@ int cpuhp_smt_enable(void)
cpuhp_online_cpu_device(cpu);
}
cpus_write_unlock();
+ rcu_unexpedite_gp();
arch_smt_update();
cpu_maps_update_done();
return ret;
--
2.53.0