Expedite synchronize_rcu during the SMT mode switch operation when
initiated via /sys/devices/system/cpu/smt/control interface

SMT mode switch operation i.e. between SMT 8 to SMT 1 or vice versa and
others, are user driven operations and therefore should complete as soon
as possible. Switching SMT states involves iterating over a list of CPUs
and performing hotplug operations. It was found these transitions took
significantly large amount of time to complete particularly on
high-core-count systems because system was blocked on synchronize_rcu
calls.

Below is one of the call-stacks that accounted for most of the blocking
time overhead as reported by offcputime bcc script for CPU offline
operation,

    finish_task_switch
    __schedule
    schedule
    schedule_timeout
    wait_for_completion
    __wait_rcu_gp
    synchronize_rcu
    cpuidle_uninstall_idle_handler
    powernv_cpuidle_cpu_dead
    cpuhp_invoke_callback
    __cpuhp_invoke_callback_range
    _cpu_down
    cpu_device_down
    cpu_subsys_offline
    device_offline
    online_store
    dev_attr_store
    sysfs_kf_write
    kernfs_fop_write_iter
    vfs_write
    ksys_write
    system_call_exception
    system_call_common
   -                bash (29705)
        5771569  ------------------------>  Duration (us)

Signed-off-by: Vishal Chourasia <[email protected]>
---
 include/linux/rcupdate.h | 3 +++
 kernel/cpu.c             | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7729fef249e1..f12d0d0f008d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1190,6 +1190,9 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, 
rcu_callback_t f)
 extern int rcu_expedited;
 extern int rcu_normal;
 
+extern void rcu_expedite_gp(void);
+extern void rcu_unexpedite_gp(void);
+
 DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
 DECLARE_LOCK_GUARD_0_ATTRS(rcu, __acquires_shared(RCU), __releases_shared(RCU))
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index edaa37419036..f5517d64d3f3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2683,6 +2683,7 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
                ret = -EBUSY;
                goto out;
        }
+       rcu_expedite_gp();
        /* Hold cpus_write_lock() for entire batch operation. */
        cpus_write_lock();
        for_each_online_cpu(cpu) {
@@ -2715,6 +2716,7 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
        if (!ret)
                cpu_smt_control = ctrlval;
        cpus_write_unlock();
+       rcu_unexpedite_gp();
        arch_smt_update();
 out:
        cpu_maps_update_done();
@@ -2734,6 +2736,7 @@ int cpuhp_smt_enable(void)
        int cpu, ret = 0;
 
        cpu_maps_update_begin();
+       rcu_expedite_gp();
        /* Hold cpus_write_lock() for entire batch operation. */
        cpus_write_lock();
        cpu_smt_control = CPU_SMT_ENABLED;
@@ -2750,6 +2753,7 @@ int cpuhp_smt_enable(void)
                cpuhp_online_cpu_device(cpu);
        }
        cpus_write_unlock();
+       rcu_unexpedite_gp();
        arch_smt_update();
        cpu_maps_update_done();
        return ret;
-- 
2.53.0


Reply via email to