On Wed, Jun 27, 2018 at 11:11:06AM +0200, Peter Zijlstra wrote:
> On Tue, Jun 26, 2018 at 04:40:04PM -0700, Paul E. McKenney wrote:
> > The options I have considered are as follows:
> 
> > 2.  Stick with the no-failsafe approach, but rely on RCU's grace-period
> >     kthread to wake up later due to its timed wait during the
> >     force-quiescent-state process.  This would be a bit obnoxious,
> >     as it requires passing a don't-wake flag (or some such) up the
> >     quiescent-state reporting mechanism.  It would also needlessly
> >     delay grace-period ends, especially on large systems (RCU scales
> >     up the FQS delay on larger systems to maintain limited CPU
> >     consumption per unit time).
> > 
> > 3.  Stick with the no-failsafe approach, but have the quiescent-state
> >     reporting code hand back a value indicating that a wakeup is needed.
> >     Also a bit obnoxious, as this value would need to be threaded up
> >     the reporting code's return path.  Simple in theory, but a bit
> >     of an ugly change, especially for the many places in the code that
> >     currently expect quiescent-state reporting to be an unconditional
> >     fire-and-forget operation.
> 
> Here's a variant on 2+3, instead of propagating the state back, we
> completely ignore if we needed a wakeup or not, and then unconditionally
> wake the GP kthread on the managing CPU's rcutree_migrate_callbacks()
> invocation.
> 
> Hotplug is rare (or should damn well be), doing a spurious wake of the
> GP thread shouldn't matter here.

Another variant, which simply skips the wakeup whever ran on an offline
CPU, relying on the wakeup from rcutree_migrate_callbacks() right after
the CPU really is dead.

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7832dd556490..417496a03259 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -104,7 +104,6 @@ struct rcu_state sname##_state = { \
        .abbr = sabbr, \
        .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
        .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
-       .ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
 }
 
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
@@ -1928,13 +1927,11 @@ static bool rcu_gp_init(struct rcu_state *rsp)
         */
        rsp->gp_state = RCU_GP_ONOFF;
        rcu_for_each_leaf_node(rsp, rnp) {
-               spin_lock(&rsp->ofl_lock);
                raw_spin_lock_irq_rcu_node(rnp);
                if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
                    !rnp->wait_blkd_tasks) {
                        /* Nothing to do on this leaf rcu_node structure. */
                        raw_spin_unlock_irq_rcu_node(rnp);
-                       spin_unlock(&rsp->ofl_lock);
                        continue;
                }
 
@@ -1970,7 +1967,6 @@ static bool rcu_gp_init(struct rcu_state *rsp)
                }
 
                raw_spin_unlock_irq_rcu_node(rnp);
-               spin_unlock(&rsp->ofl_lock);
        }
        rcu_gp_slow(rsp, gp_preinit_delay); /* Races with CPU hotplug. */
 
@@ -2250,11 +2246,19 @@ static int __noreturn rcu_gp_kthread(void *arg)
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
        __releases(rcu_get_root(rsp)->lock)
 {
+       int cpu = smp_processor_id();
+
        raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp));
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
        WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
        raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
-       rcu_gp_kthread_wake(rsp);
+
+       /*
+        * When our @cpu is offline, we'll get a wakeup from
+        * rcutree_migrate_callbacks.
+        */
+       if (cpu_online(cpu))
+               rcu_gp_kthread_wake(rsp);
 }
 
 /*
@@ -3768,18 +3772,15 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct 
rcu_state *rsp)
 
        /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
        mask = rdp->grpmask;
-       spin_lock(&rsp->ofl_lock);
        raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order 
guarantee. */
        rdp->rcu_ofl_gp_seq = READ_ONCE(rsp->gp_seq);
        rdp->rcu_ofl_gp_flags = READ_ONCE(rsp->gp_flags);
+       rnp->qsmaskinitnext &= ~mask;
        if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */
-               /* Report quiescent state -before- changing ->qsmaskinitnext! */
                rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
        }
-       rnp->qsmaskinitnext &= ~mask;
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-       spin_unlock(&rsp->ofl_lock);
 }
 
 /*
@@ -3849,6 +3850,12 @@ void rcutree_migrate_callbacks(int cpu)
 {
        struct rcu_state *rsp;
 
+       /*
+        * Just in case the outgoing CPU needed to wake the GP kthread
+        * do so here.
+        */
+       rcu_gp_kthread_wake(rsp);
+
        for_each_rcu_flavor(rsp)
                rcu_migrate_callbacks(cpu, rsp);
 }
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4e74df768c57..8dab71838141 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -367,10 +367,6 @@ struct rcu_state {
        const char *name;                       /* Name of structure. */
        char abbr;                              /* Abbreviated name. */
        struct list_head flavors;               /* List of RCU flavors. */
-
-       spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
-                                               /* Synchronize offline with */
-                                               /*  GP pre-initialization. */
 };
 
 /* Values for rcu_state structure's gp_flags field. */

Reply via email to