This callback is still partly asymmetrical since the counterpart of mce_device_create is done in CPU_DEAD.
On failure we don't undo mce_device_create() doing _but_ it will happen once we move CPU_DEAD to the state machine. Cc: Tony Luck <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: [email protected] Cc: [email protected] Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> --- arch/x86/kernel/cpu/mcheck/mce.c | 61 ++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 596a7128a46b..b1770ebcb8de 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2487,18 +2487,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned int cpu = (unsigned long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - mce_device_create(cpu); - if (threshold_cpu_callback_online) { - int ret; - - ret = threshold_cpu_callback_online(cpu); - if (ret) { - mce_device_remove(cpu); - return NOTIFY_BAD; - } - } - break; case CPU_DEAD: if (threshold_cpu_callback_dead) threshold_cpu_callback_dead(cpu); @@ -2514,6 +2502,22 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } +static int mce_cpu_online(unsigned int cpu) +{ + int ret; + + mce_device_create(cpu); + if (!threshold_cpu_callback_online) + return 0; + + ret = threshold_cpu_callback_online(cpu); + if (ret) { + mce_device_remove(cpu); + return ret; + } + return 0; +} + static int mce_cpu_down_dying(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); @@ -2547,8 +2551,8 @@ static __init void mce_init_banks(void) static __init int mcheck_init_device(void) { + enum cpuhp_state hp_online; int err; - int i = 0; if (!mce_available(&boot_cpu_data)) { err = -EIO; @@ -2580,22 +2584,13 @@ static __init int mcheck_init_device(void) mcheck_cpu_starting, mce_cpu_down_dying); if (err) goto err_init_pool; + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online", + mce_cpu_online, NULL); + if (err < 0) + goto err_hp_online; + hp_online = err; cpu_notifier_register_begin(); - for_each_online_cpu(i) { - err = mce_device_create(i); - if (err) { - /* - * Register notifier anyway (and do not unreg it) so - * that we don't leave undeleted timers, see notifier - * callback above. - */ - __register_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - goto err_device_create; - } - } - __register_hotcpu_notifier(&mce_cpu_notifier); cpu_notifier_register_done(); @@ -2610,17 +2605,9 @@ static __init int mcheck_init_device(void) err_register: unregister_syscore_ops(&mce_syscore_ops); + cpuhp_remove_state(hp_online); -err_device_create: - /* - * We didn't keep track of which devices were created above, but - * even if we had, the set of online cpus might have changed. - * Play safe and remove for every possible cpu, since - * mce_device_remove() will do the right thing. - */ - for_each_possible_cpu(i) - mce_device_remove(i); - +err_hp_online: cpuhp_remove_state(CPUHP_AP_X86_MCE_STARTING); err_init_pool: -- 2.10.2

