When a shutdown is initiated in the root partition without configuring sleep states, the call to `hv_call_enter_sleep_state` fails. In such cases the root falls back to using legacy ACPI mechanisms to poweroff. This call is intercepted by MSHV and will result in a Machine Check Exception (MCE).
Root panics with a trace similar to: [ 81.306348] reboot: Power down [ 81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001 [ 81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9 [ 81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff [ 81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii' [ 81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt [ 81.314717] Kernel panic - not syncing: Fatal machine check To prevent this, properly configure sleep states within MSHV, allowing the root partition to shut down cleanly without triggering a panic. Signed-off-by: Praveen K Paladugu <[email protected]> Co-developed-by: Anatol Belski <[email protected]> Signed-off-by: Anatol Belski <[email protected]> --- arch/x86/hyperv/hv_init.c | 7 ++ arch/x86/include/asm/mshyperv.h | 1 + drivers/hv/hv_common.c | 119 ++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index afdbda2dd7b7..57bd96671ead 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -510,6 +510,13 @@ void __init hyperv_init(void) memunmap(src); hv_remap_tsc_clocksource(); + /* + * The notifier registration might fail at various hops. + * Corresponding error messages will land in dmesg. There is + * otherwise nothing that can be specifically done to handle + * failures here. + */ + (void)hv_sleep_notifiers_register(); } else { hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index abc4659f5809..fb8d691193df 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -236,6 +236,7 @@ int hyperv_fill_flush_guest_mapping_list( void hv_apic_init(void); void __init hv_init_spinlocks(void); bool hv_vcpu_is_preempted(int vcpu); +int hv_sleep_notifiers_register(void); #else static inline void hv_apic_init(void) {} #endif diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index e109a620c83f..cfba9ded7bcb 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -837,3 +837,122 @@ const char *hv_result_to_string(u64 status) return "Unknown"; } EXPORT_SYMBOL_GPL(hv_result_to_string); + +#if IS_ENABLED(CONFIG_ACPI) +/* + * Corresponding sleep states have to be initialized in order for a subsequent + * HVCALL_ENTER_SLEEP_STATE call to succeed. Currently only S5 state as per + * ACPI 6.4 chapter 7.4.2 is relevant, while S1, S2 and S3 can be supported. + * + * ACPI should be initialized and should support S5 sleep state when this method + * is called, so that it can extract correct PM values and pass them to hv. + */ +static int hv_initialize_sleep_states(void) +{ + u64 status; + unsigned long flags; + struct hv_input_set_system_property *in; + acpi_status acpi_status; + u8 sleep_type_a, sleep_type_b; + + if (!acpi_sleep_state_supported(ACPI_STATE_S5)) { + pr_err("%s: S5 sleep state not supported.\n", __func__); + return -ENODEV; + } + + acpi_status = acpi_get_sleep_type_data(ACPI_STATE_S5, + &sleep_type_a, &sleep_type_b); + if (ACPI_FAILURE(acpi_status)) + return -ENODEV; + + local_irq_save(flags); + in = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(in, 0, sizeof(*in)); + + in->property_id = HV_SYSTEM_PROPERTY_SLEEP_STATE; + in->set_sleep_state_info.sleep_state = HV_SLEEP_STATE_S5; + in->set_sleep_state_info.pm1a_slp_typ = sleep_type_a; + in->set_sleep_state_info.pm1b_slp_typ = sleep_type_b; + + status = hv_do_hypercall(HVCALL_SET_SYSTEM_PROPERTY, in, NULL); + local_irq_restore(flags); + + if (!hv_result_success(status)) { + hv_status_err(status, "\n"); + return hv_result_to_errno(status); + } + + return 0; +} + +static int hv_call_enter_sleep_state(u32 sleep_state) +{ + u64 status; + int ret; + unsigned long flags; + struct hv_input_enter_sleep_state *in; + + ret = hv_initialize_sleep_states(); + if (ret) + return ret; + + local_irq_save(flags); + in = *this_cpu_ptr(hyperv_pcpu_input_arg); + in->sleep_state = sleep_state; + + status = hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL); + local_irq_restore(flags); + + if (!hv_result_success(status)) { + hv_status_err(status, "\n"); + return hv_result_to_errno(status); + } + + return 0; +} + +static int hv_reboot_notifier_handler(struct notifier_block *this, + unsigned long code, void *another) +{ + int ret = 0; + + if (code == SYS_HALT || code == SYS_POWER_OFF) + ret = hv_call_enter_sleep_state(HV_SLEEP_STATE_S5); + + return ret ? NOTIFY_DONE : NOTIFY_OK; +} + +static struct notifier_block hv_reboot_notifier = { + .notifier_call = hv_reboot_notifier_handler, +}; + +static int hv_acpi_sleep_handler(u8 sleep_state, u32 pm1a_cnt, u32 pm1b_cnt) +{ + int ret = 0; + + if (sleep_state == ACPI_STATE_S5) + ret = hv_call_enter_sleep_state(HV_SLEEP_STATE_S5); + + return ret == 0 ? 1 : -1; +} + +static int hv_acpi_extended_sleep_handler(u8 sleep_state, u32 val_a, u32 val_b) +{ + return hv_acpi_sleep_handler(sleep_state, val_a, val_b); +} + +int hv_sleep_notifiers_register(void) +{ + int ret; + + acpi_os_set_prepare_sleep(&hv_acpi_sleep_handler); + acpi_os_set_prepare_extended_sleep(&hv_acpi_extended_sleep_handler); + + ret = register_reboot_notifier(&hv_reboot_notifier); + if (ret) + pr_err("%s: cannot register reboot notifier %d\n", + __func__, ret); + + return ret; +} +#endif -- 2.51.0
