On Tue, Sep 05, 2017 at 03:36:11PM +0200, Thomas Gleixner wrote:
> On Tue, 5 Sep 2017, Thomas Gleixner wrote:
> > On Tue, 5 Sep 2017, Peter Zijlstra wrote:
> > 
> > > These two patches appear to make hotplug work again without tripping 
> > > lockdep.
> > 
> > They cover the case where the plug/unplug succeeds, but they will not work
> > when a plug/unplug operation fails, because after a fail it rolls back
> > automatically, so in case UP fails, it will go down again, but the
> > initiator side still waits on the 'UP' completion. Same issue on down.
> > 
> > I think that extra lockdep magic can be avoided completely by splitting the
> > completions into a 'up' and a 'down' completion, but that only solves a
> > part of the problem. The current failure handling does an automated
> > rollback, so if UP fails somewhere the AP rolls back, which means it
> > invokes the down callbacks. DOWN the other way round.
> > 
> > We can solve that by changing the way how rollback is handled so it does
> > not automatically roll back.
> > 
> >     if (callback() < 0) {
> >        store_state();
> >        complete(UP);
> >        wait_for_being_kicked_again()
> >     }
> > 
> > and on the control side have
> > 
> >     wait_for_completion(UP);
> > 
> >     if (UP->failed) {
> >             kick(DOWN);
> >     wait_for_completion(DOWN);
> >     }
> > 
> > It's not entirely trivial, but I haven't seen a real problem with it yet.
> 
> Now I found one. It's the multi instance rollback. This is a nested
> rollback mechanism deep in the call chain. Seperating that one is going to
> be a major pain.

Yes, *ouch*.. Does something like the below look like something in the
right direction to you?

It appears to boot and offline, online cycle a CPU. But this might just
be a fluke, tricky stuff this.

Of course, the rollback is 100% untested... because that doesn't
normally trigger. I'll have to write some kernel modules for that. And,
as we discussed, the failure during rollback is 'interesting' and I
simply BUG() on that for now.

---
 kernel/cpu.c | 335 +++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 232 insertions(+), 103 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index acf5308fad51..02edb0f1d786 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -46,7 +46,8 @@
  * @bringup:   Single callback bringup or teardown selector
  * @cb_state:  The state for a single callback (install/uninstall)
  * @result:    Result of the operation
- * @done:      Signal completion to the issuer of the task
+ * @done_up:   Signal completion to the issuer of the task for cpu-up
+ * @done_down: Signal completion to the issuer of the task for cpu-down
  */
 struct cpuhp_cpu_state {
        enum cpuhp_state        state;
@@ -58,20 +59,51 @@ struct cpuhp_cpu_state {
        bool                    single;
        bool                    bringup;
        struct hlist_node       *node;
+       struct hlist_node       *last;
        enum cpuhp_state        cb_state;
        int                     result;
-       struct completion       done;
+       struct completion       done_up;
+       struct completion       done_down;
 #endif
 };
 
 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
 
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
-static struct lock_class_key cpuhp_state_key;
-static struct lockdep_map cpuhp_state_lock_map =
-       STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+static struct lockdep_map cpuhp_state_up_map =
+       STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
+static struct lockdep_map cpuhp_state_down_map =
+       STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
+
+
+static void inline cpuhp_lock_acquire(bool bringup)
+{
+       lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+
+static void inline cpuhp_lock_release(bool bringup)
+{
+       lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+#else
+
+static void inline cpuhp_lock_acquire(bool bringup) { }
+static void inline cpuhp_lock_release(bool bringup) { }
+
 #endif
 
+static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
+{
+       struct completion *done = bringup ? &st->done_up : &st->done_down;
+       wait_for_completion(done);
+}
+
+static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
+{
+       struct completion *done = bringup ? &st->done_up : &st->done_down;
+       complete(done);
+}
+
 /**
  * cpuhp_step - Hotplug state machine step
  * @name:      Name of the step
@@ -129,7 +161,8 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state 
state)
  * Called from cpu hotplug and from the state register machinery.
  */
 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
-                                bool bringup, struct hlist_node *node)
+                                bool bringup, struct hlist_node *node,
+                                struct hlist_node **lastp)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        struct cpuhp_step *step = cpuhp_get_step(state);
@@ -138,6 +171,7 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum 
cpuhp_state state,
        int ret, cnt;
 
        if (!step->multi_instance) {
+               WARN_ON_ONCE(lastp && *lastp);
                cb = bringup ? step->startup.single : step->teardown.single;
                if (!cb)
                        return 0;
@@ -152,6 +186,7 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum 
cpuhp_state state,
 
        /* Single invocation for instance add/remove */
        if (node) {
+               WARN_ON_ONCE(lastp && *lastp);
                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
                ret = cbm(cpu, node);
                trace_cpuhp_exit(cpu, st->state, state, ret);
@@ -161,14 +196,26 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum 
cpuhp_state state,
        /* State transition. Invoke on all instances */
        cnt = 0;
        hlist_for_each(node, &step->list) {
+               if (lastp && node == *lastp)
+                       break;
+
                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
                ret = cbm(cpu, node);
                trace_cpuhp_exit(cpu, st->state, state, ret);
-               if (ret)
-                       goto err;
+
+               if (ret) {
+                       if (!lastp)
+                               goto err;
+
+                       *lastp = node;
+                       return ret;
+               }
                cnt++;
        }
+       if (lastp)
+               *lastp = NULL;
        return 0;
+
 err:
        /* Rollback the instances if one failed */
        cbm = !bringup ? step->startup.multi : step->teardown.multi;
@@ -271,14 +318,73 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st);
+static inline enum cpuhp_state
+cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state = st->state;
+
+       st->rollback = false;
+       st->last = NULL;
+       st->target = target;
+       st->bringup = st->state < target;
+
+       return prev_state;
+}
+
+static inline void
+cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+{
+       st->rollback = true;
+       /*
+        * Unless we have st->last set, we've failed to reach st->state
+        * and must start by undoing the previous state. If we have st->last
+        * we need to undo partial multi_instance of this state first.
+        */
+       if (!st->last)
+               st->state--;
+       st->target = prev_state;
+       st->bringup = st->state < prev_state;
+}
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
+{
+       if (st->state == st->target)
+               return;
+
+       st->result = 0;
+       st->single = false;
+       /*
+        * Make sure the above stores are visible before should_run becomes
+        * true. Paired with the mb() above in cpuhp_thread_fun()
+        */
+       smp_mb();
+       st->should_run = true;
+       wake_up_process(st->thread);
+       wait_for_ap_thread(st, st->bringup);
+}
+
+static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state = cpuhp_set_state(st, target);
+       int ret;
+
+       __cpuhp_kick_ap(st);
+
+       if ((ret = st->result)) {
+               cpuhp_reset_state(st, prev_state);
+               __cpuhp_kick_ap(st);
+       }
+
+       return ret;
+}
 
 static int bringup_wait_for_ap(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
        /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
-       wait_for_completion(&st->done);
+       wait_for_ap_thread(st, true);
        if (WARN_ON_ONCE((!cpu_online(cpu))))
                return -ECANCELED;
 
@@ -286,12 +392,10 @@ static int bringup_wait_for_ap(unsigned int cpu)
        stop_machine_unpark(cpu);
        kthread_unpark(st->thread);
 
-       /* Should we go further up ? */
-       if (st->target > CPUHP_AP_ONLINE_IDLE) {
-               __cpuhp_kick_ap_work(st);
-               wait_for_completion(&st->done);
-       }
-       return st->result;
+       if (st->target <= CPUHP_AP_ONLINE_IDLE)
+               return 0;
+
+       return cpuhp_kick_ap(st, st->target);
 }
 
 static int bringup_cpu(unsigned int cpu)
@@ -323,7 +427,7 @@ static void undo_cpu_down(unsigned int cpu, struct 
cpuhp_cpu_state *st)
                struct cpuhp_step *step = cpuhp_get_step(st->state);
 
                if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, true, NULL);
+                       cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
        }
 }
 
@@ -334,7 +438,7 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct 
cpuhp_cpu_state *st,
        int ret = 0;
 
        for (; st->state > target; st->state--) {
-               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
+               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
                if (ret) {
                        st->target = prev_state;
                        undo_cpu_down(cpu, st);
@@ -350,7 +454,7 @@ static void undo_cpu_up(unsigned int cpu, struct 
cpuhp_cpu_state *st)
                struct cpuhp_step *step = cpuhp_get_step(st->state);
 
                if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, false, NULL);
+                       cpuhp_invoke_callback(cpu, st->state, false, NULL, 
NULL);
        }
 }
 
@@ -362,7 +466,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct 
cpuhp_cpu_state *st,
 
        while (st->state < target) {
                st->state++;
-               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
+               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
                if (ret) {
                        st->target = prev_state;
                        undo_cpu_up(cpu, st);
@@ -379,7 +483,8 @@ static void cpuhp_create(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
-       init_completion(&st->done);
+       init_completion(&st->done_up);
+       init_completion(&st->done_down);
 }
 
 static int cpuhp_should_run(unsigned int cpu)
@@ -389,20 +494,6 @@ static int cpuhp_should_run(unsigned int cpu)
        return st->should_run;
 }
 
-/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
-static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
-
-       return cpuhp_down_callbacks(cpu, st, target);
-}
-
-/* Execute the online startup callbacks. Used to be CPU_ONLINE */
-static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       return cpuhp_up_callbacks(cpu, st, st->target);
-}
-
 /*
  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
  * callbacks when a state gets [un]installed at runtime.
@@ -410,48 +501,70 @@ static int cpuhp_ap_online(unsigned int cpu, struct 
cpuhp_cpu_state *st)
 static void cpuhp_thread_fun(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
-       int ret = 0;
+       bool bringup = st->bringup;
+       enum cpuhp_state state;
 
        /*
-        * Paired with the mb() in cpuhp_kick_ap_work and
-        * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+        * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
+        * that if we see ->should_run we also see the rest of the state.
         */
        smp_mb();
-       if (!st->should_run)
+
+       if (WARN_ON_ONCE(!st->should_run))
                return;
 
-       st->should_run = false;
+       cpuhp_lock_acquire(bringup);
 
-       lock_map_acquire(&cpuhp_state_lock_map);
-       /* Single callback invocation for [un]install ? */
        if (st->single) {
-               if (st->cb_state < CPUHP_AP_ONLINE) {
-                       local_irq_disable();
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
-                                                   st->bringup, st->node);
-                       local_irq_enable();
+               state = st->cb_state;
+               st->should_run = false;
+       } else {
+               if (bringup) {
+                       st->state++;
+                       st->should_run = (st->state < st->target);
+                       BUG_ON(st->state > st->target);
+                       state = st->state;
                } else {
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
-                                                   st->bringup, st->node);
+                       state = st->state;
+                       st->state--;
+                       st->should_run = (st->state >
+                                         max((int)st->target, 
CPUHP_TEARDOWN_CPU));
+                       BUG_ON(st->state < st->target);
                }
-       } else if (st->rollback) {
-               BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+       }
 
-               undo_cpu_down(cpu, st);
-               st->rollback = false;
+//     BUG_ON(!cpuhp_is_ap_state(state));
+       WARN_ONCE(!cpuhp_is_ap_state(state), "invalid AP state: %d\n", state);
+
+       if (st->rollback) {
+               struct cpuhp_step *step = cpuhp_get_step(state);
+               if (step->skip_onerr)
+                       goto next;
+       }
+
+       if (state < CPUHP_AP_ONLINE) {
+               local_irq_disable();
+               st->result = cpuhp_invoke_callback(cpu, state, bringup, 
st->node, &st->last);
+               local_irq_enable();
        } else {
-               /* Cannot happen .... */
-               BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
-
-               /* Regular hotplug work */
-               if (st->state < st->target)
-                       ret = cpuhp_ap_online(cpu, st);
-               else if (st->state > st->target)
-                       ret = cpuhp_ap_offline(cpu, st);
+               st->result = cpuhp_invoke_callback(cpu, state, bringup, 
st->node, &st->last);
+       }
+
+       if (st->result) {
+               /*
+                * If we fail on a rollback, we're up a creek without no
+                * paddle, no way forward, no way back. We loose, thanks for
+                * playing.
+                */
+               BUG_ON(st->rollback);
+               st->should_run = false;
        }
-       lock_map_release(&cpuhp_state_lock_map);
-       st->result = ret;
-       complete(&st->done);
+
+next:
+       cpuhp_lock_release(bringup);
+
+       if (!st->should_run)
+               complete_ap_thread(st, bringup);
 }
 
 /* Invoke a single callback on a remote cpu */
@@ -460,62 +573,75 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, 
bool bringup,
                         struct hlist_node *node)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       int ret;
 
        if (!cpu_online(cpu))
                return 0;
 
-       lock_map_acquire(&cpuhp_state_lock_map);
-       lock_map_release(&cpuhp_state_lock_map);
+       cpuhp_lock_acquire(false);
+       cpuhp_lock_release(false);
+
+       cpuhp_lock_acquire(true);
+       cpuhp_lock_release(true);
 
        /*
         * If we are up and running, use the hotplug thread. For early calls
         * we invoke the thread function directly.
         */
        if (!st->thread)
-               return cpuhp_invoke_callback(cpu, state, bringup, node);
+               return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 
-       st->cb_state = state;
+       st->rollback = false;
        st->single = true;
        st->bringup = bringup;
        st->node = node;
+       st->last = NULL;
+       st->cb_state = state;
+       st->result = 0;
 
        /*
-        * Make sure the above stores are visible before should_run becomes
-        * true. Paired with the mb() above in cpuhp_thread_fun()
+        * RELEASE - ensures the above stores are visible when should_run
+        * becomes true. Paired with the smp_mb() in cpuhp_thread_fun().
         */
        smp_mb();
        st->should_run = true;
        wake_up_process(st->thread);
-       wait_for_completion(&st->done);
-       return st->result;
-}
+       wait_for_ap_thread(st, bringup);
 
-/* Regular hotplug invocation of the AP hotplug thread */
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
-{
-       st->result = 0;
-       st->single = false;
        /*
-        * Make sure the above stores are visible before should_run becomes
-        * true. Paired with the mb() above in cpuhp_thread_fun()
+        * If we failed and did a partial, do a rollback.
         */
-       smp_mb();
-       st->should_run = true;
-       wake_up_process(st->thread);
+       if ((ret = st->result) && st->last) {
+               st->rollback = true;
+               st->bringup = !bringup;
+
+               smp_mb();
+               st->should_run = true;
+               wake_up_process(st->thread);
+               wait_for_ap_thread(st, !bringup);
+       }
+
+       return ret;
 }
 
 static int cpuhp_kick_ap_work(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-       enum cpuhp_state state = st->state;
+       enum cpuhp_state prev_state = st->state;
+       int ret;
+
+       trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
+
+       cpuhp_lock_acquire(false);
+       cpuhp_lock_release(false);
+
+       cpuhp_lock_acquire(true);
+       cpuhp_lock_release(true);
+
+       ret = cpuhp_kick_ap(st, st->target);
 
-       trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
-       lock_map_acquire(&cpuhp_state_lock_map);
-       lock_map_release(&cpuhp_state_lock_map);
-       __cpuhp_kick_ap_work(st);
-       wait_for_completion(&st->done);
-       trace_cpuhp_exit(cpu, st->state, state, st->result);
-       return st->result;
+       trace_cpuhp_exit(cpu, st->state, prev_state, ret);
+       return ret;
 }
 
 static struct smp_hotplug_thread cpuhp_threads = {
@@ -595,7 +721,7 @@ static int take_cpu_down(void *_param)
        st->state--;
        /* Invoke the former CPU_DYING callbacks */
        for (; st->state > target; st->state--)
-               cpuhp_invoke_callback(cpu, st->state, false, NULL);
+               cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 
        /* Give up timekeeping duties */
        tick_handover_do_timer();
@@ -639,7 +765,7 @@ static int takedown_cpu(unsigned int cpu)
         *
         * Wait for the stop thread to go away.
         */
-       wait_for_completion(&st->done);
+       wait_for_ap_thread(st, false);
        BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 
        /* Interrupts are moved away from the dying cpu, reenable alloc/free */
@@ -658,7 +784,7 @@ static void cpuhp_complete_idle_dead(void *arg)
 {
        struct cpuhp_cpu_state *st = arg;
 
-       complete(&st->done);
+       complete_ap_thread(st, false);
 }
 
 void cpuhp_report_idle_dead(void)
@@ -680,6 +806,7 @@ void cpuhp_report_idle_dead(void)
 #define takedown_cpu           NULL
 #endif
 
+
 #ifdef CONFIG_HOTPLUG_CPU
 
 /* Requires cpu_add_remove_lock to be held */
@@ -699,8 +826,7 @@ static int __ref _cpu_down(unsigned int cpu, int 
tasks_frozen,
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       prev_state = st->state;
-       st->target = target;
+       prev_state = cpuhp_set_state(st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread.
@@ -727,8 +853,7 @@ static int __ref _cpu_down(unsigned int cpu, int 
tasks_frozen,
         */
        ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
-               st->target = prev_state;
-               st->rollback = true;
+               cpuhp_reset_state(st, prev_state);
                cpuhp_kick_ap_work(cpu);
        }
 
@@ -776,7 +901,7 @@ void notify_cpu_starting(unsigned int cpu)
        rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
        while (st->state < target) {
                st->state++;
-               cpuhp_invoke_callback(cpu, st->state, true, NULL);
+               cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
        }
 }
 
@@ -794,7 +919,7 @@ void cpuhp_online_idle(enum cpuhp_state state)
                return;
 
        st->state = CPUHP_AP_ONLINE_IDLE;
-       complete(&st->done);
+       complete_ap_thread(st, true);
 }
 
 /* Requires cpu_add_remove_lock to be held */
@@ -829,7 +954,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum 
cpuhp_state target)
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       st->target = target;
+       cpuhp_set_state(st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread once more.
@@ -1296,6 +1421,10 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state 
state, bool bringup,
        struct cpuhp_step *sp = cpuhp_get_step(state);
        int ret;
 
+       /*
+        * If there's nothing to do, we done.
+        * Relies on the union for multi_instance.
+        */
        if ((bringup && !sp->startup.single) ||
            (!bringup && !sp->teardown.single))
                return 0;
@@ -1307,9 +1436,9 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state 
state, bool bringup,
        if (cpuhp_is_ap_state(state))
                ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
        else
-               ret = cpuhp_invoke_callback(cpu, state, bringup, node);
+               ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 #else
-       ret = cpuhp_invoke_callback(cpu, state, bringup, node);
+       ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 #endif
        BUG_ON(ret && !bringup);
        return ret;

Reply via email to