On Wed, Apr 14, 2021 at 10:49:37AM -0400, Tianyu Lan wrote: > From: Tianyu Lan <tianyu....@microsoft.com> > > Hyper-V provides GHCB protocol to write Synthetic Interrupt > Controller MSR registers and these registers are emulated by > Hypervisor rather than paravisor.
What is paravisor? Is that the VMPL0 to borrow AMD speak from https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf > > Hyper-V requests to write SINTx MSR registers twice(once via > GHCB and once via wrmsr instruction including the proxy bit 21) Why? And what does 'proxy bit 21' mean? Isn't it just setting a bit on the MSR? Oh. Are you writting to it twice because you need to let the hypervisor know (Hyper-V) which is done via the WRMSR. And then the inner hypervisor (VMPL0) via the SINITx MSR? > Guest OS ID MSR also needs to be set via GHCB. > > Signed-off-by: Tianyu Lan <tianyu....@microsoft.com> > --- > arch/x86/hyperv/hv_init.c | 18 +---- > arch/x86/hyperv/ivm.c | 130 ++++++++++++++++++++++++++++++++ > arch/x86/include/asm/mshyperv.h | 87 +++++++++++++++++---- > arch/x86/kernel/cpu/mshyperv.c | 3 + > drivers/hv/hv.c | 65 +++++++++++----- > include/asm-generic/mshyperv.h | 4 +- > 6 files changed, 261 insertions(+), 46 deletions(-) > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > index 90e65fbf4c58..87b1dd9c84d6 100644 > --- a/arch/x86/hyperv/hv_init.c > +++ b/arch/x86/hyperv/hv_init.c > @@ -475,6 +475,9 @@ void __init hyperv_init(void) > > ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); > *ghcb_base = ghcb_va; > + > + /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ > + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); > } > > rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); > @@ -561,6 +564,7 @@ void hyperv_cleanup(void) > > /* Reset our OS id */ > wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); > + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); > > /* > * Reset hypercall page reference before reset the page, > @@ -668,17 +672,3 @@ bool hv_is_hibernation_supported(void) > return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); > } > EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); > - > -enum hv_isolation_type hv_get_isolation_type(void) > -{ > - if (!(ms_hyperv.features_b & HV_ISOLATION)) > - return HV_ISOLATION_TYPE_NONE; > - return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); > -} > -EXPORT_SYMBOL_GPL(hv_get_isolation_type); > - > -bool hv_is_isolation_supported(void) > -{ > - return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; > -} > -EXPORT_SYMBOL_GPL(hv_is_isolation_supported); > diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c > index a5950b7a9214..2ec64b367aaf 100644 > --- a/arch/x86/hyperv/ivm.c > +++ b/arch/x86/hyperv/ivm.c > @@ -6,12 +6,139 @@ > * Tianyu Lan <tianyu....@microsoft.com> > */ > > +#include <linux/types.h> > +#include <linux/bitfield.h> > #include <linux/hyperv.h> > #include <linux/types.h> > #include <linux/bitfield.h> > #include <asm/io.h> > +#include <asm/svm.h> > +#include <asm/sev-es.h> > #include <asm/mshyperv.h> > > +union hv_ghcb { > + struct ghcb ghcb; > +} __packed __aligned(PAGE_SIZE); > + > +void hv_ghcb_msr_write(u64 msr, u64 value) > +{ > + union hv_ghcb *hv_ghcb; > + void **ghcb_base; > + unsigned long flags; > + > + if (!ms_hyperv.ghcb_base) > + return; > + > + local_irq_save(flags); > + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); > + hv_ghcb = (union hv_ghcb *)*ghcb_base; > + if (!hv_ghcb) { > + local_irq_restore(flags); > + return; > + } > + > + memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE); > + > + hv_ghcb->ghcb.protocol_version = 1; > + hv_ghcb->ghcb.ghcb_usage = 0; > + > + ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR); > + ghcb_set_rcx(&hv_ghcb->ghcb, msr); > + ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value)); > + ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32); > + ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 1); > + ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0); > + > + VMGEXIT(); > + > + if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1) > + pr_warn("Fail to write msr via ghcb.\n."); > + > + local_irq_restore(flags); > +} > +EXPORT_SYMBOL_GPL(hv_ghcb_msr_write); > + > +void hv_ghcb_msr_read(u64 msr, u64 *value) > +{ > + union hv_ghcb *hv_ghcb; > + void **ghcb_base; > + unsigned long flags; > + > + if (!ms_hyperv.ghcb_base) > + return; > + > + local_irq_save(flags); > + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base); > + hv_ghcb = (union hv_ghcb *)*ghcb_base; > + if (!hv_ghcb) { > + local_irq_restore(flags); > + return; > + } > + > + memset(hv_ghcb, 0x00, PAGE_SIZE); > + hv_ghcb->ghcb.protocol_version = 1; > + hv_ghcb->ghcb.ghcb_usage = 0; > + > + ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR); > + ghcb_set_rcx(&hv_ghcb->ghcb, msr); > + ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 0); > + ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0); > + > + VMGEXIT(); > + > + if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1) > + pr_warn("Fail to write msr via ghcb.\n."); > + else > + *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax) > + | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32); > + local_irq_restore(flags); > +} > +EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); > + > +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value) > +{ > + hv_ghcb_msr_read(msr, value); > +} > +EXPORT_SYMBOL_GPL(hv_sint_rdmsrl_ghcb); > + > +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value) > +{ > + hv_ghcb_msr_write(msr, value); > + > + /* Write proxy bit vua wrmsrl instruction. */ > + if (msr >= HV_X64_MSR_SINT0 && msr <= HV_X64_MSR_SINT15) > + wrmsrl(msr, value | 1 << 20); > +} > +EXPORT_SYMBOL_GPL(hv_sint_wrmsrl_ghcb); > + > +inline void hv_signal_eom_ghcb(void) > +{ > + hv_sint_wrmsrl_ghcb(HV_X64_MSR_EOM, 0); > +} > +EXPORT_SYMBOL_GPL(hv_signal_eom_ghcb); > + > +enum hv_isolation_type hv_get_isolation_type(void) > +{ > + if (!(ms_hyperv.features_b & HV_ISOLATION)) > + return HV_ISOLATION_TYPE_NONE; > + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); > +} > +EXPORT_SYMBOL_GPL(hv_get_isolation_type); > + > +bool hv_is_isolation_supported(void) > +{ > + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; > +} > +EXPORT_SYMBOL_GPL(hv_is_isolation_supported); > + > +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); > + > +bool hv_isolation_type_snp(void) > +{ > + return static_branch_unlikely(&isolation_type_snp); > +} > +EXPORT_SYMBOL_GPL(hv_isolation_type_snp); > + > /* > * hv_set_mem_host_visibility - Set host visibility for specified memory. > */ > @@ -22,6 +149,9 @@ int hv_set_mem_host_visibility(void *kbuffer, u32 size, > u32 visibility) > u64 *pfn_array; > int ret = 0; > > + if (!hv_is_isolation_supported()) > + return 0; > + > pfn_array = vzalloc(HV_HYP_PAGE_SIZE); > if (!pfn_array) > return -ENOMEM; > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index d9437f096ce5..73501dbbc240 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -10,6 +10,8 @@ > #include <asm/nospec-branch.h> > #include <asm/paravirt.h> > > +DECLARE_STATIC_KEY_FALSE(isolation_type_snp); > + > typedef int (*hyperv_fill_flush_list_func)( > struct hv_guest_mapping_flush_list *flush, > void *data); > @@ -19,24 +21,64 @@ typedef int (*hyperv_fill_flush_list_func)( > #define hv_init_timer_config(timer, val) \ > wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val) > > -#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val) > -#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val) > +#define hv_get_sint_reg(val, reg) { \ > + if (hv_isolation_type_snp()) \ > + hv_get_##reg##_ghcb(&val); \ > + else \ > + rdmsrl(HV_X64_MSR_##reg, val); \ > + } > + > +#define hv_set_sint_reg(val, reg) { \ > + if (hv_isolation_type_snp()) \ > + hv_set_##reg##_ghcb(val); \ > + else \ > + wrmsrl(HV_X64_MSR_##reg, val); \ > + } > + > > -#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val) > -#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val) > +#define hv_get_simp(val) hv_get_sint_reg(val, SIMP) > +#define hv_get_siefp(val) hv_get_sint_reg(val, SIEFP) > > -#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val) > -#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val) > +#define hv_set_simp(val) hv_set_sint_reg(val, SIMP) > +#define hv_set_siefp(val) hv_set_sint_reg(val, SIEFP) > + > +#define hv_get_synic_state(val) { \ > + if (hv_isolation_type_snp()) \ > + hv_get_synic_state_ghcb(&val); \ > + else \ > + rdmsrl(HV_X64_MSR_SCONTROL, val); \ > + } > +#define hv_set_synic_state(val) { \ > + if (hv_isolation_type_snp()) \ > + hv_set_synic_state_ghcb(val); \ > + else \ > + wrmsrl(HV_X64_MSR_SCONTROL, val); \ > + } > > #define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) > > -#define hv_signal_eom() wrmsrl(HV_X64_MSR_EOM, 0) > +#define hv_signal_eom() { \ > + if (hv_isolation_type_snp() && \ > + old_msg_type != HVMSG_TIMER_EXPIRED) \ > + hv_signal_eom_ghcb(); \ > + else \ > + wrmsrl(HV_X64_MSR_EOM, 0); \ > + } > > -#define hv_get_synint_state(int_num, val) \ > - rdmsrl(HV_X64_MSR_SINT0 + int_num, val) > -#define hv_set_synint_state(int_num, val) \ > - wrmsrl(HV_X64_MSR_SINT0 + int_num, val) > -#define hv_recommend_using_aeoi() \ > +#define hv_get_synint_state(int_num, val) { \ > + if (hv_isolation_type_snp()) \ > + hv_get_synint_state_ghcb(int_num, &val);\ > + else \ > + rdmsrl(HV_X64_MSR_SINT0 + int_num, val);\ > + } > +#define hv_set_synint_state(int_num, val) { \ > + if (hv_isolation_type_snp()) \ > + hv_set_synint_state_ghcb(int_num, val); \ > + else \ > + wrmsrl(HV_X64_MSR_SINT0 + int_num, val);\ > + } > + > +#define hv_recommend_using_aeoi() \ > (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)) > > #define hv_get_crash_ctl(val) \ > @@ -271,6 +313,25 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct > hv_interrupt_entry *entry); > > int hv_set_mem_host_visibility(void *kbuffer, u32 size, u32 visibility); > int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility); > +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value); > +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value); > +void hv_signal_eom_ghcb(void); > +void hv_ghcb_msr_write(u64 msr, u64 value); > +void hv_ghcb_msr_read(u64 msr, u64 *value); > + > +#define hv_get_synint_state_ghcb(int_num, val) \ > + hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val) > +#define hv_set_synint_state_ghcb(int_num, val) \ > + hv_sint_wrmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val) > + > +#define hv_get_SIMP_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIMP, val) > +#define hv_set_SIMP_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIMP, val) > + > +#define hv_get_SIEFP_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIEFP, val) > +#define hv_set_SIEFP_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIEFP, val) > + > +#define hv_get_synic_state_ghcb(val) > hv_sint_rdmsrl_ghcb(HV_X64_MSR_SCONTROL, val) > +#define hv_set_synic_state_ghcb(val) > hv_sint_wrmsrl_ghcb(HV_X64_MSR_SCONTROL, val) > #else /* CONFIG_HYPERV */ > static inline void hyperv_init(void) {} > static inline void hyperv_setup_mmu_ops(void) {} > @@ -289,9 +350,9 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, > { > return -1; > } > +static inline void hv_signal_eom_ghcb(void) { }; > #endif /* CONFIG_HYPERV */ > > - > #include <asm-generic/mshyperv.h> > > #endif > diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c > index aeafd4017c89..6eaa0891c0f7 100644 > --- a/arch/x86/kernel/cpu/mshyperv.c > +++ b/arch/x86/kernel/cpu/mshyperv.c > @@ -333,6 +333,9 @@ static void __init ms_hyperv_init_platform(void) > > pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B > 0x%x\n", > ms_hyperv.isolation_config_a, > ms_hyperv.isolation_config_b); > + > + if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) > + static_branch_enable(&isolation_type_snp); > } > > if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { > diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c > index f202ac7f4b3d..069530eeb7c6 100644 > --- a/drivers/hv/hv.c > +++ b/drivers/hv/hv.c > @@ -99,17 +99,24 @@ int hv_synic_alloc(void) > tasklet_init(&hv_cpu->msg_dpc, > vmbus_on_msg_dpc, (unsigned long) hv_cpu); > > - hv_cpu->synic_message_page = > - (void *)get_zeroed_page(GFP_ATOMIC); > - if (hv_cpu->synic_message_page == NULL) { > - pr_err("Unable to allocate SYNIC message page\n"); > - goto err; > - } > + /* > + * Synic message and event pages are allocated by paravisor. > + * Skip these pages allocation here. > + */ > + if (!hv_isolation_type_snp()) { > + hv_cpu->synic_message_page = > + (void *)get_zeroed_page(GFP_ATOMIC); > + if (hv_cpu->synic_message_page == NULL) { > + pr_err("Unable to allocate SYNIC message > page\n"); > + goto err; > + } > > - hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC); > - if (hv_cpu->synic_event_page == NULL) { > - pr_err("Unable to allocate SYNIC event page\n"); > - goto err; > + hv_cpu->synic_event_page = > + (void *)get_zeroed_page(GFP_ATOMIC); > + if (hv_cpu->synic_event_page == NULL) { > + pr_err("Unable to allocate SYNIC event page\n"); > + goto err; > + } > } > > hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); > @@ -136,10 +143,17 @@ void hv_synic_free(void) > for_each_present_cpu(cpu) { > struct hv_per_cpu_context *hv_cpu > = per_cpu_ptr(hv_context.cpu_context, cpu); > + free_page((unsigned long)hv_cpu->post_msg_page); > + > + /* > + * Synic message and event pages are allocated by paravisor. > + * Skip free these pages here. > + */ > + if (hv_isolation_type_snp()) > + continue; > > free_page((unsigned long)hv_cpu->synic_event_page); > free_page((unsigned long)hv_cpu->synic_message_page); > - free_page((unsigned long)hv_cpu->post_msg_page); > } > > kfree(hv_context.hv_numa_map); > @@ -161,20 +175,36 @@ void hv_synic_enable_regs(unsigned int cpu) > union hv_synic_sint shared_sint; > union hv_synic_scontrol sctrl; > > - /* Setup the Synic's message page */ > + > + /* Setup the Synic's message. */ > hv_get_simp(simp.as_uint64); > simp.simp_enabled = 1; > - simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) > - >> HV_HYP_PAGE_SHIFT; > + > + if (hv_isolation_type_snp()) { > + hv_cpu->synic_message_page > + = ioremap_cache(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, > + HV_HYP_PAGE_SIZE); > + if (!hv_cpu->synic_message_page) > + pr_err("Fail to map syinc message page.\n"); > + } else { > + simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page) > + >> HV_HYP_PAGE_SHIFT; > + } > > hv_set_simp(simp.as_uint64); > > /* Setup the Synic's event page */ > hv_get_siefp(siefp.as_uint64); > siefp.siefp_enabled = 1; > - siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) > - >> HV_HYP_PAGE_SHIFT; > - > + if (hv_isolation_type_snp()) { > + hv_cpu->synic_event_page = ioremap_cache( > + siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, > HV_HYP_PAGE_SIZE); > + if (!hv_cpu->synic_event_page) > + pr_err("Fail to map syinc event page.\n"); > + } else { > + siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page) > + >> HV_HYP_PAGE_SHIFT; > + } > hv_set_siefp(siefp.as_uint64); > > /* Setup the shared SINT. */ > @@ -188,7 +218,6 @@ void hv_synic_enable_regs(unsigned int cpu) > /* Enable the global synic bit */ > hv_get_synic_state(sctrl.as_uint64); > sctrl.enable = 1; > - > hv_set_synic_state(sctrl.as_uint64); > } > > diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h > index b73e201abc70..59777377e641 100644 > --- a/include/asm-generic/mshyperv.h > +++ b/include/asm-generic/mshyperv.h > @@ -23,6 +23,7 @@ > #include <linux/bitops.h> > #include <linux/cpumask.h> > #include <asm/ptrace.h> > +#include <asm/mshyperv.h> > #include <asm/hyperv-tlfs.h> > > struct ms_hyperv_info { > @@ -52,7 +53,7 @@ extern struct ms_hyperv_info ms_hyperv; > > extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); > extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); > - > +extern bool hv_isolation_type_snp(void); > > /* Generate the guest OS identifier as described in the Hyper-V TLFS */ > static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, > @@ -186,6 +187,7 @@ bool hv_is_hyperv_initialized(void); > bool hv_is_hibernation_supported(void); > enum hv_isolation_type hv_get_isolation_type(void); > bool hv_is_isolation_supported(void); > +bool hv_isolation_type_snp(void); > void hyperv_cleanup(void); > #else /* CONFIG_HYPERV */ > static inline bool hv_is_hyperv_initialized(void) { return false; } > -- > 2.25.1 >