On Mon, Apr 27, 2026 at 02:38:52PM -0700, Jork Loeser wrote:
> The SynIC is shared between VMBus and MSHV. VMBus owns the message
> page (SIMP), event flags page (SIEFP), global enable (SCONTROL),
> and SINT2. MSHV adds SINT0, SINT5, and the event ring page (SIRBP).
> 
> Currently mshv_synic_cpu_init() redundantly enables SIMP, SIEFP, and
> SCONTROL that VMBus already configured, and mshv_synic_cpu_exit()
> disables all of them. This is wrong because MSHV can be torn down
> while VMBus is still active. In particular, a kexec reboot notifier
> tears down MSHV first. Disabling SCONTROL, SIMP, and SIEFP out
> from under VMBus causes its later cleanup to write SynIC MSRs while
> SynIC is disabled, which the hypervisor does not tolerate.
> 
> Restrict MSHV to managing only the resources it owns:
> - SINT0, SINT5: mask on cleanup, unmask on init
> - SIRBP: enable/disable as before
> - SIMP, SIEFP, SCONTROL: leave to VMBus when it is active (L1VH
>   and nested root partition); on a non-nested root partition VMBus
>   does not run, so MSHV must enable/disable them
> 
> While here, fix the SIEFP and SIRBP memremap() and virt_to_phys()
> calls to use HV_HYP_PAGE_SHIFT/HV_HYP_PAGE_SIZE instead of
> PAGE_SHIFT/PAGE_SIZE. The hypervisor always uses 4K pages for SynIC
> register GPAs regardless of the kernel page size, so using PAGE_SHIFT
> produces wrong addresses on ARM64 with 64K pages.
> 
> Note that initialization order matters - VMBUS first, MSHV second,
> and the reverse on de-init. Ideally, we would want a dedicated SYNIC
> driver that replaces the cross-dependencies with a clear API and
> dynamic tracking. Such refactor should go into its own dedicated
> series, outside of this kexec fix series.
> 
> Signed-off-by: Jork Loeser <[email protected]>
> ---
>  drivers/hv/hv.c         |   3 +
>  drivers/hv/mshv_synic.c | 150 ++++++++++++++++++++++++++--------------
>  2 files changed, 103 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index ae60fd542292..ef4b1b03395d 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -272,6 +272,9 @@ void hv_synic_free(void)
>  /*
>   * hv_hyp_synic_enable_regs - Initialize the Synthetic Interrupt Controller
>   * with the hypervisor.
> + *
> + * Note: When MSHV is present, mshv_synic_cpu_init() intializes further
> + * registers later.
>   */
>  void hv_hyp_synic_enable_regs(unsigned int cpu)
>  {
> diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c
> index e2288a726fec..2db3b0192eac 100644
> --- a/drivers/hv/mshv_synic.c
> +++ b/drivers/hv/mshv_synic.c
> @@ -13,6 +13,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/cpuhotplug.h>
> +#include <linux/hyperv.h>
>  #include <linux/reboot.h>
>  #include <asm/mshyperv.h>
>  #include <linux/acpi.h>
> @@ -456,46 +457,75 @@ static int mshv_synic_cpu_init(unsigned int cpu)
>       union hv_synic_siefp siefp;
>       union hv_synic_sirbp sirbp;
>       union hv_synic_sint sint;
> -     union hv_synic_scontrol sctrl;
>       struct hv_synic_pages *spages = this_cpu_ptr(synic_pages);
>       struct hv_message_page **msg_page = &spages->hyp_synic_message_page;
>       struct hv_synic_event_flags_page **event_flags_page =
>                       &spages->synic_event_flags_page;
>       struct hv_synic_event_ring_page **event_ring_page =
>                       &spages->synic_event_ring_page;
> +     /*
> +      * VMBus owns SIMP/SIEFP/SCONTROL when it is active.
> +      * See hv_hyp_synic_enable_regs() for that initialization.
> +      */
> +     bool vmbus_active = hv_vmbus_exists();
>  
> -     /* Setup the Synic's message page */
> +     /*
> +      * Map the SYNIC message page. When VMBus is not active the
> +      * hypervisor pre-provisions the SIMP GPA but may not set
> +      * simp_enabled — enable it here.
> +      */
>       simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP);
> -     simp.simp_enabled = true;
> +     if (!vmbus_active) {
> +             simp.simp_enabled = true;
> +             hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
> +     }
>       *msg_page = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
>                            HV_HYP_PAGE_SIZE,
>                            MEMREMAP_WB);
>  
>       if (!(*msg_page))
> -             return -EFAULT;
> -
> -     hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
> +             goto cleanup_simp;

It would be cleaner (and simpler to read), if there would be another
goto label to only unset HV_MSR_SIMP instead of checking *msg_page for
NULL again in the cleanup_simp label.

This applies to all the goto labels in this function.

Reviewed-by: Stanislav Kinsburskii <[email protected]>

>  
> -     /* Setup the Synic's event flags page */
> +     /*
> +      * Map the event flags page. Same as SIMP: enable when
> +      * VMBus is not active, already enabled by VMBus otherwise.
> +      */
>       siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP);
> -     siefp.siefp_enabled = true;
> -     *event_flags_page = memremap(siefp.base_siefp_gpa << PAGE_SHIFT,
> -                                  PAGE_SIZE, MEMREMAP_WB);
> +     if (!vmbus_active) {
> +             siefp.siefp_enabled = true;
> +             hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
> +     }
> +     *event_flags_page = memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> +                                  HV_HYP_PAGE_SIZE, MEMREMAP_WB);
>  
>       if (!(*event_flags_page))
> -             goto cleanup;
> -
> -     hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
> +             goto cleanup_siefp;
>  
>       /* Setup the Synic's event ring page */
>       sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP);
> -     sirbp.sirbp_enabled = true;
> -     *event_ring_page = memremap(sirbp.base_sirbp_gpa << PAGE_SHIFT,
> -                                 PAGE_SIZE, MEMREMAP_WB);
>  
> -     if (!(*event_ring_page))
> -             goto cleanup;
> +     if (hv_root_partition()) {
> +             *event_ring_page = memremap(sirbp.base_sirbp_gpa << 
> HV_HYP_PAGE_SHIFT,
> +                                         HV_HYP_PAGE_SIZE, MEMREMAP_WB);
>  
> +             if (!(*event_ring_page))
> +                     goto cleanup_siefp;
> +     } else {
> +             /*
> +              * On L1VH the hypervisor does not provide a SIRBP page.
> +              * Allocate one and program its GPA into the MSR.
> +              */
> +             *event_ring_page = (struct hv_synic_event_ring_page *)
> +                     get_zeroed_page(GFP_KERNEL);
> +
> +             if (!(*event_ring_page))
> +                     goto cleanup_siefp;
> +
> +             sirbp.base_sirbp_gpa = virt_to_phys(*event_ring_page)
> +                             >> HV_HYP_PAGE_SHIFT;
> +     }
> +
> +     sirbp.sirbp_enabled = true;
>       hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
>  
>       if (mshv_sint_irq != -1)
> @@ -518,28 +548,30 @@ static int mshv_synic_cpu_init(unsigned int cpu)
>       hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX,
>                             sint.as_uint64);
>  
> -     /* Enable global synic bit */
> -     sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
> -     sctrl.enable = 1;
> -     hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
> +     /* When VMBus is active it already enabled SCONTROL. */
> +     if (!vmbus_active) {
> +             union hv_synic_scontrol sctrl;
> +
> +             sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
> +             sctrl.enable = 1;
> +             hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
> +     }
>  
>       return 0;
>  
> -cleanup:
> -     if (*event_ring_page) {
> -             sirbp.sirbp_enabled = false;
> -             hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
> -             memunmap(*event_ring_page);
> -     }
> -     if (*event_flags_page) {
> +cleanup_siefp:
> +     if (*event_flags_page)
> +             memunmap(*event_flags_page);
> +     if (!vmbus_active) {
>               siefp.siefp_enabled = false;
>               hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
> -             memunmap(*event_flags_page);
>       }
> -     if (*msg_page) {
> +cleanup_simp:
> +     if (*msg_page)
> +             memunmap(*msg_page);
> +     if (!vmbus_active) {
>               simp.simp_enabled = false;
>               hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
> -             memunmap(*msg_page);
>       }
>  
>       return -EFAULT;
> @@ -548,16 +580,15 @@ static int mshv_synic_cpu_init(unsigned int cpu)
>  static int mshv_synic_cpu_exit(unsigned int cpu)
>  {
>       union hv_synic_sint sint;
> -     union hv_synic_simp simp;
> -     union hv_synic_siefp siefp;
>       union hv_synic_sirbp sirbp;
> -     union hv_synic_scontrol sctrl;
>       struct hv_synic_pages *spages = this_cpu_ptr(synic_pages);
>       struct hv_message_page **msg_page = &spages->hyp_synic_message_page;
>       struct hv_synic_event_flags_page **event_flags_page =
>               &spages->synic_event_flags_page;
>       struct hv_synic_event_ring_page **event_ring_page =
>               &spages->synic_event_ring_page;
> +     /* VMBus owns SIMP/SIEFP/SCONTROL when it is active */
> +     bool vmbus_active = hv_vmbus_exists();
>  
>       /* Disable the interrupt */
>       sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + 
> HV_SYNIC_INTERCEPTION_SINT_INDEX);
> @@ -574,28 +605,47 @@ static int mshv_synic_cpu_exit(unsigned int cpu)
>       if (mshv_sint_irq != -1)
>               disable_percpu_irq(mshv_sint_irq);
>  
> -     /* Disable Synic's event ring page */
> +     /* Disable SYNIC event ring page owned by MSHV */
>       sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP);
>       sirbp.sirbp_enabled = false;
> -     hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
> -     memunmap(*event_ring_page);
>  
> -     /* Disable Synic's event flags page */
> -     siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP);
> -     siefp.siefp_enabled = false;
> -     hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
> +     if (hv_root_partition()) {
> +             hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
> +             memunmap(*event_ring_page);
> +     } else {
> +             sirbp.base_sirbp_gpa = 0;
> +             hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64);
> +             free_page((unsigned long)*event_ring_page);
> +     }
> +
> +     /*
> +      * Release our mappings of the message and event flags pages.
> +      * When VMBus is not active, we enabled SIMP/SIEFP — disable
> +      * them. Otherwise VMBus owns the MSRs — leave them.
> +      */
>       memunmap(*event_flags_page);
> +     if (!vmbus_active) {
> +             union hv_synic_simp simp;
> +             union hv_synic_siefp siefp;
>  
> -     /* Disable Synic's message page */
> -     simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP);
> -     simp.simp_enabled = false;
> -     hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
> +             siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP);
> +             siefp.siefp_enabled = false;
> +             hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64);
> +
> +             simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP);
> +             simp.simp_enabled = false;
> +             hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64);
> +     }
>       memunmap(*msg_page);
>  
> -     /* Disable global synic bit */
> -     sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
> -     sctrl.enable = 0;
> -     hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
> +     /* When VMBus is active it owns SCONTROL — leave it. */
> +     if (!vmbus_active) {
> +             union hv_synic_scontrol sctrl;
> +
> +             sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL);
> +             sctrl.enable = 0;
> +             hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64);
> +     }
>  
>       return 0;
>  }
> -- 
> 2.43.0
> 

Reply via email to