From: Peter Zijlstra <pet...@infradead.org> Sent: Wednesday, April 30, 2025 
4:08 AM
> 
> What used to be a simple few instructions has turned into a giant mess
> (for x86_64). Not only does it use static_branch wrong, it mixes it
> with dynamic branches for no apparent reason.
> 
> Notably it uses static_branch through an out-of-line function call,
> which completely defeats the purpose, since instead of a simple
> JMP/NOP site, you get a CALL+RET+TEST+Jcc sequence in return, which is
> absolutely idiotic.
> 
> Add to that a dynamic test of hyperv_paravisor_present, something
> which is set once and never changed.
> 
> Replace all this idiocy with a single direct function call to the
> right hypercall variant.
> 
> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>

I've done these tests on Hyper-V VMs with this patch series. My focus
is the Hyper-V changes in Patches 11 and 12, not the other changes.

* Normal VM boot and basic smoke test
* TDX and SEV-SNP VMs boot and basic smoke test. These VMs have
   a paravisor
* Normal VM taking a panic and running the kdump kernel
* Normal VM suspending for hibernation, then resuming from
   hibernation
* Verified that IBT is enabled in a normal VM. It's not offered in a TDX
   VM on Hyper-V when a paravisor is used. I don't know about the case
   without a paravisor.
* Building a 64-bit kernel with and without CONFIG_AMD_MEM_ENCRYPT
   and CONFIG_INTEL_TDX_GUEST.
* Building a 32-bit kernel (but I did not try to run it)

TDX and SEV-SNP VMs without a paravisor are not tested, so updating
the static call, and the new direct call path, has not been tested for
TDX and SNP hypercalls. I don't have a hardware environment where I
can test without a paravisor.

Tested-by: Michael Kelley <mhkli...@outlook.com>
Reviewed-by: Michael Kelley <mhkli...@outlook.com>

> ---
>  arch/x86/hyperv/hv_init.c       |   20 +++++
>  arch/x86/hyperv/ivm.c           |   15 ++++
>  arch/x86/include/asm/mshyperv.h |  137 
> +++++++++++-----------------------------
>  arch/x86/kernel/cpu/mshyperv.c  |   19 +++--
>  4 files changed, 89 insertions(+), 102 deletions(-)
> 
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -35,7 +35,27 @@
>  #include <linux/highmem.h>
> 
>  void *hv_hypercall_pg;
> +
> +#ifdef CONFIG_X86_64
> +u64 hv_std_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +     u64 hv_status;
> +
> +     if (!hv_hypercall_pg)
> +             return U64_MAX;
> +
> +     register u64 __r8 asm("r8") = param2;
> +     asm volatile (CALL_NOSPEC
> +                   : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +                     "+c" (control), "+d" (param1), "+r" (__r8)
> +                   : THUNK_TARGET(hv_hypercall_pg)
> +                   : "cc", "memory", "r9", "r10", "r11");
> +
> +     return hv_status;
> +}
> +#else
>  EXPORT_SYMBOL_GPL(hv_hypercall_pg);
> +#endif
> 
>  union hv_ghcb * __percpu *hv_ghcb_pg;
> 
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -376,9 +376,23 @@ int hv_snp_boot_ap(u32 cpu, unsigned lon
>       return ret;
>  }
> 
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
> +{
> +     u64 hv_status;
> +
> +     register u64 __r8 asm("r8") = param2;
> +     asm volatile("vmmcall"
> +                  : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +                    "+c" (control), "+d" (param1), "+r" (__r8)
> +                  : : "cc", "memory", "r9", "r10", "r11");
> +
> +     return hv_status;
> +}
> +
>  #else
>  static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
>  static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
> +u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
>  #endif /* CONFIG_AMD_MEM_ENCRYPT */
> 
>  #ifdef CONFIG_INTEL_TDX_GUEST
> @@ -428,6 +442,7 @@ u64 hv_tdx_hypercall(u64 control, u64 pa
>  #else
>  static inline void hv_tdx_msr_write(u64 msr, u64 value) {}
>  static inline void hv_tdx_msr_read(u64 msr, u64 *value) {}
> +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
>  #endif /* CONFIG_INTEL_TDX_GUEST */
> 
>  #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -6,6 +6,7 @@
>  #include <linux/nmi.h>
>  #include <linux/msi.h>
>  #include <linux/io.h>
> +#include <linux/static_call.h>
>  #include <asm/nospec-branch.h>
>  #include <asm/paravirt.h>
>  #include <hyperv/hvhdk.h>
> @@ -38,16 +39,21 @@ static inline unsigned char hv_get_nmi_r
>       return 0;
>  }
> 
> -#if IS_ENABLED(CONFIG_HYPERV)
> -extern bool hyperv_paravisor_present;
> +extern u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2);
> +extern u64 hv_std_hypercall(u64 control, u64 param1, u64 param2);
> 
> +#if IS_ENABLED(CONFIG_HYPERV)
>  extern void *hv_hypercall_pg;
> 
>  extern union hv_ghcb * __percpu *hv_ghcb_pg;
> 
>  bool hv_isolation_type_snp(void);
>  bool hv_isolation_type_tdx(void);
> -u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
> +
> +#ifdef CONFIG_X86_64
> +DECLARE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
> +#endif
> 
>  /*
>   * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA
> @@ -64,37 +70,15 @@ static inline u64 hv_do_hypercall(u64 co
>  {
>       u64 input_address = input ? virt_to_phys(input) : 0;
>       u64 output_address = output ? virt_to_phys(output) : 0;
> -     u64 hv_status;
> 
>  #ifdef CONFIG_X86_64
> -     if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> -             return hv_tdx_hypercall(control, input_address, output_address);
> -
> -     if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> -             __asm__ __volatile__("mov %[output_address], %%r8\n"
> -                                  "vmmcall"
> -                                  : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -                                    "+c" (control), "+d" (input_address)
> -                                  : [output_address] "r" (output_address)
> -                                  : "cc", "memory", "r8", "r9", "r10", 
> "r11");
> -             return hv_status;
> -     }
> -
> -     if (!hv_hypercall_pg)
> -             return U64_MAX;
> -
> -     __asm__ __volatile__("mov %[output_address], %%r8\n"
> -                          CALL_NOSPEC
> -                          : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -                            "+c" (control), "+d" (input_address)
> -                          : [output_address] "r" (output_address),
> -                            THUNK_TARGET(hv_hypercall_pg)
> -                          : "cc", "memory", "r8", "r9", "r10", "r11");
> +     return static_call_mod(hv_hypercall)(control, input_address, 
> output_address);
>  #else
>       u32 input_address_hi = upper_32_bits(input_address);
>       u32 input_address_lo = lower_32_bits(input_address);
>       u32 output_address_hi = upper_32_bits(output_address);
>       u32 output_address_lo = lower_32_bits(output_address);
> +     u64 hv_status;
> 
>       if (!hv_hypercall_pg)
>               return U64_MAX;
> @@ -107,8 +91,8 @@ static inline u64 hv_do_hypercall(u64 co
>                              "D"(output_address_hi), "S"(output_address_lo),
>                              THUNK_TARGET(hv_hypercall_pg)
>                            : "cc", "memory");
> -#endif /* !x86_64 */
>       return hv_status;
> +#endif /* !x86_64 */
>  }
> 
>  /* Hypercall to the L0 hypervisor */
> @@ -120,41 +104,23 @@ static inline u64 hv_do_nested_hypercall
>  /* Fast hypercall with 8 bytes of input and no output */
>  static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
>  {
> -     u64 hv_status;
> -
>  #ifdef CONFIG_X86_64
> -     if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> -             return hv_tdx_hypercall(control, input1, 0);
> -
> -     if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> -             __asm__ __volatile__(
> -                             "vmmcall"
> -                             : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -                             "+c" (control), "+d" (input1)
> -                             :: "cc", "r8", "r9", "r10", "r11");
> -     } else {
> -             __asm__ __volatile__(CALL_NOSPEC
> -                                  : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -                                    "+c" (control), "+d" (input1)
> -                                  : THUNK_TARGET(hv_hypercall_pg)
> -                                  : "cc", "r8", "r9", "r10", "r11");
> -     }
> +     return static_call_mod(hv_hypercall)(control, input1, 0);
>  #else
> -     {
> -             u32 input1_hi = upper_32_bits(input1);
> -             u32 input1_lo = lower_32_bits(input1);
> -
> -             __asm__ __volatile__ (CALL_NOSPEC
> -                                   : "=A"(hv_status),
> -                                     "+c"(input1_lo),
> -                                     ASM_CALL_CONSTRAINT
> -                                   : "A" (control),
> -                                     "b" (input1_hi),
> -                                     THUNK_TARGET(hv_hypercall_pg)
> -                                   : "cc", "edi", "esi");
> -     }
> -#endif
> +     u32 input1_hi = upper_32_bits(input1);
> +     u32 input1_lo = lower_32_bits(input1);
> +     u64 hv_status;
> +
> +     __asm__ __volatile__ (CALL_NOSPEC
> +                           : "=A"(hv_status),
> +                           "+c"(input1_lo),
> +                           ASM_CALL_CONSTRAINT
> +                           : "A" (control),
> +                           "b" (input1_hi),
> +                           THUNK_TARGET(hv_hypercall_pg)
> +                           : "cc", "edi", "esi");
>       return hv_status;
> +#endif
>  }
> 
>  static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> @@ -174,45 +140,24 @@ static inline u64 hv_do_fast_nested_hype
>  /* Fast hypercall with 16 bytes of input */
>  static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 
> input2)
>  {
> -     u64 hv_status;
> -
>  #ifdef CONFIG_X86_64
> -     if (hv_isolation_type_tdx() && !hyperv_paravisor_present)
> -             return hv_tdx_hypercall(control, input1, input2);
> -
> -     if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
> -             __asm__ __volatile__("mov %[input2], %%r8\n"
> -                                  "vmmcall"
> -                                  : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -                                    "+c" (control), "+d" (input1)
> -                                  : [input2] "r" (input2)
> -                                  : "cc", "r8", "r9", "r10", "r11");
> -     } else {
> -             __asm__ __volatile__("mov %[input2], %%r8\n"
> -                                  CALL_NOSPEC
> -                                  : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -                                    "+c" (control), "+d" (input1)
> -                                  : [input2] "r" (input2),
> -                                    THUNK_TARGET(hv_hypercall_pg)
> -                                  : "cc", "r8", "r9", "r10", "r11");
> -     }
> +     return static_call_mod(hv_hypercall)(control, input1, input2);
>  #else
> -     {
> -             u32 input1_hi = upper_32_bits(input1);
> -             u32 input1_lo = lower_32_bits(input1);
> -             u32 input2_hi = upper_32_bits(input2);
> -             u32 input2_lo = lower_32_bits(input2);
> -
> -             __asm__ __volatile__ (CALL_NOSPEC
> -                                   : "=A"(hv_status),
> -                                     "+c"(input1_lo), ASM_CALL_CONSTRAINT
> -                                   : "A" (control), "b" (input1_hi),
> -                                     "D"(input2_hi), "S"(input2_lo),
> -                                     THUNK_TARGET(hv_hypercall_pg)
> -                                   : "cc");
> -     }
> -#endif
> +     u32 input1_hi = upper_32_bits(input1);
> +     u32 input1_lo = lower_32_bits(input1);
> +     u32 input2_hi = upper_32_bits(input2);
> +     u32 input2_lo = lower_32_bits(input2);
> +     u64 hv_status;
> +
> +     __asm__ __volatile__ (CALL_NOSPEC
> +                           : "=A"(hv_status),
> +                           "+c"(input1_lo), ASM_CALL_CONSTRAINT
> +                           : "A" (control), "b" (input1_hi),
> +                           "D"(input2_hi), "S"(input2_lo),
> +                           THUNK_TARGET(hv_hypercall_pg)
> +                           : "cc");
>       return hv_status;
> +#endif
>  }
> 
>  static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -37,10 +37,6 @@
>  bool hv_nested;
>  struct ms_hyperv_info ms_hyperv;
> 
> -/* Used in modules via hv_do_hypercall(): see 
> arch/x86/include/asm/mshyperv.h */
> -bool hyperv_paravisor_present __ro_after_init;
> -EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
> -
>  #if IS_ENABLED(CONFIG_HYPERV)
>  static inline unsigned int hv_get_nested_msr(unsigned int reg)
>  {
> @@ -287,8 +283,18 @@ static void __init x86_setup_ops_for_tsc
>       old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
>       x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
>  }
> +
> +#ifdef CONFIG_X86_64
> +DEFINE_STATIC_CALL(hv_hypercall, hv_std_hypercall);
> +EXPORT_STATIC_CALL_TRAMP_GPL(hv_hypercall);
> +#define hypercall_update(hc) static_call_update(hv_hypercall, hc)
> +#endif
>  #endif /* CONFIG_HYPERV */
> 
> +#ifndef hypercall_update
> +#define hypercall_update(hc) (void)hc
> +#endif
> +
>  static uint32_t  __init ms_hyperv_platform(void)
>  {
>       u32 eax;
> @@ -483,14 +489,14 @@ static void __init ms_hyperv_init_platfo
>                       ms_hyperv.shared_gpa_boundary =
>                               BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
> 
> -             hyperv_paravisor_present = !!ms_hyperv.paravisor_present;
> -
>               pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 
> 0x%x\n",
>                       ms_hyperv.isolation_config_a, 
> ms_hyperv.isolation_config_b);
> 
> 
>               if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
>                       static_branch_enable(&isolation_type_snp);
> +                     if (!ms_hyperv.paravisor_present)
> +                             hypercall_update(hv_snp_hypercall);
>               } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) {
>                       static_branch_enable(&isolation_type_tdx);
> 
> @@ -498,6 +504,7 @@ static void __init ms_hyperv_init_platfo
>                       ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED;
> 
>                       if (!ms_hyperv.paravisor_present) {
> +                             hypercall_update(hv_tdx_hypercall);
>                               /*
>                                * Mark the Hyper-V TSC page feature as disabled
>                                * in a TDX VM without paravisor so that the
> 
> 


Reply via email to