Early prototype.

When running nested KVM on Hyper-V it's possible to use so called
'Enlightened VMCS' and do normal memory reads/writes instead of
doing VMWRITE/VMREAD instructions. Tests show that this speeds up
tight CPUID loop almost 3 times:

Before:
./cpuid_tight
20459

After:
./cpuid_tight
7698

checkpatch.pl errors/warnings and 32bit brokenness are known things.

Main RFC questions I have are:
- Do we want to have this per L2 VM or per L1 host?
- How can we achieve zero overhead for non-Hyper-V deployments? Use static
  keys? But this will only work if we decide to do eVMCS per host.
- Can we do better than a big switch in evmcs_read()/evmcs_write()? And
  probably don't use 'case' defines which checkpatch.pl hates.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
 arch/x86/kvm/vmx.c | 595 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 593 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index efff9d035543..dfdfd15c3d60 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,6 +51,7 @@
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
 #include <asm/intel_pt.h>
+#include <asm/mshyperv.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -198,6 +199,9 @@ extern const ulong vmx_return;
 
 #define NR_AUTOLOAD_MSRS 8
 
+static bool __read_mostly enlightened_vmcs = true;
+module_param(enlightened_vmcs, bool, 0444);
+
 struct vmcs {
        u32 revision_id;
        u32 abort;
@@ -1498,11 +1502,22 @@ static inline void loaded_vmcs_init(struct loaded_vmcs 
*loaded_vmcs)
        loaded_vmcs->launched = 0;
 }
 
+static inline void vmcs_load_enlightened(u64 phys_addr)
+{
+       int cpu = smp_processor_id();
+
+       hv_vp_assist_page[cpu]->current_nested_vmcs = phys_addr;
+       hv_vp_assist_page[cpu]->enlighten_vmentry = 1;
+}
+
 static void vmcs_load(struct vmcs *vmcs)
 {
        u64 phys_addr = __pa(vmcs);
        u8 error;
 
+       if (enlightened_vmcs)
+               return vmcs_load_enlightened(phys_addr);
+
        asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
                        : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
                        : "cc", "memory");
@@ -1620,6 +1635,514 @@ static inline void ept_sync_context(u64 eptp)
                ept_sync_global();
 }
 
+/*
+ *  Enlightened VMCSv1 doesn't support these:
+ *     POSTED_INTR_NV                  = 0x00000002,
+ *     GUEST_INTR_STATUS               = 0x00000810,
+ *     GUEST_PML_INDEX                 = 0x00000812,
+ *     IO_BITMAP_A_HIGH                = 0x00002001,
+ *     IO_BITMAP_B_HIGH                = 0x00002003,
+ *     MSR_BITMAP_HIGH                 = 0x00002005,
+ *     VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+ *     VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+ *     VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+ *     PML_ADDRESS                     = 0x0000200e,
+ *     PML_ADDRESS_HIGH                = 0x0000200f,
+ *     TSC_OFFSET_HIGH                 = 0x00002011,
+ *     VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+ *     APIC_ACCESS_ADDR                = 0x00002014,
+ *     APIC_ACCESS_ADDR_HIGH           = 0x00002015,
+ *     POSTED_INTR_DESC_ADDR           = 0x00002016,
+ *     POSTED_INTR_DESC_ADDR_HIGH      = 0x00002017,
+ *     VM_FUNCTION_CONTROL             = 0x00002018,
+ *     VM_FUNCTION_CONTROL_HIGH        = 0x00002019,
+ *     EPT_POINTER_HIGH                = 0x0000201b,
+ *     EOI_EXIT_BITMAP0                = 0x0000201c,
+ *     EOI_EXIT_BITMAP0_HIGH           = 0x0000201d,
+ *     EOI_EXIT_BITMAP1                = 0x0000201e,
+ *     EOI_EXIT_BITMAP1_HIGH           = 0x0000201f,
+ *     EOI_EXIT_BITMAP2                = 0x00002020,
+ *     EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
+ *     EOI_EXIT_BITMAP3                = 0x00002022,
+ *     EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+ *     EPTP_LIST_ADDRESS               = 0x00002024,
+ *     EPTP_LIST_ADDRESS_HIGH          = 0x00002025,
+ *     VMREAD_BITMAP                   = 0x00002026,
+ *     VMWRITE_BITMAP                  = 0x00002028,
+ *     XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
+ *     TSC_MULTIPLIER                  = 0x00002032,
+ *     TSC_MULTIPLIER_HIGH             = 0x00002033,
+ *     GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
+ *     VMCS_LINK_POINTER_HIGH          = 0x00002801,
+ *     GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+ *     GUEST_IA32_PAT_HIGH             = 0x00002805,
+ *     GUEST_IA32_EFER_HIGH            = 0x00002807,
+ *     GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
+ *     GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ *     GUEST_PDPTR0_HIGH               = 0x0000280b,
+ *     GUEST_PDPTR1_HIGH               = 0x0000280d,
+ *     GUEST_PDPTR2_HIGH               = 0x0000280f,
+ *     GUEST_PDPTR3_HIGH               = 0x00002811,
+ *     GUEST_BNDCFGS_HIGH              = 0x00002813,
+ *     GUEST_IA32_RTIT_CTL             = 0x00002814,
+ *     GUEST_IA32_RTIT_CTL_HIGH        = 0x00002815,
+ *     HOST_IA32_PAT_HIGH              = 0x00002c01,
+ *     HOST_IA32_EFER_HIGH             = 0x00002c03,
+ *     HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
+ *     HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ *     VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+ *     VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+ *     VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+ *     PLE_GAP                         = 0x00004020,
+ *     PLE_WINDOW                      = 0x00004022,
+ *     VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+ */
+
+#define evmcs_write_field(field, efield, mask)         \
+       case field:                                     \
+       evmcs->efield = value;                          \
+       evmcs->hv_clean_fields &= ~mask;                \
+       break;
+
+#define evmcs_read_field(field, efield)                        \
+       case field:                                     \
+       return evmcs->efield;                           \
+
+static void evmcs_write(unsigned long field, u64 value)
+{
+       int cpu = smp_processor_id();
+       struct hv_enlightened_vmcs *evmcs =
+               __va(hv_vp_assist_page[cpu]->current_nested_vmcs);
+
+       switch (field) {
+               /* 64 bit fields */
+               evmcs_write_field(GUEST_RIP, guest_rip,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+               evmcs_write_field(GUEST_RSP, guest_rsp,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC);
+               evmcs_write_field(GUEST_RFLAGS, guest_rflags,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC);
+               evmcs_write_field(HOST_IA32_PAT, host_ia32_pat,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_IA32_EFER, host_ia32_efer,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_CR0, host_cr0,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_CR3, host_cr3,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_CR4, host_cr4,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_IA32_SYSENTER_ESP,
+                                 host_ia32_sysenter_esp,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_IA32_SYSENTER_EIP,
+                                 host_ia32_sysenter_eip,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_RIP, host_rip,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(IO_BITMAP_A, io_bitmap_a,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP);
+               evmcs_write_field(IO_BITMAP_B, io_bitmap_b,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP);
+               evmcs_write_field(MSR_BITMAP, msr_bitmap,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP);
+               evmcs_write_field(GUEST_ES_BASE, guest_es_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_CS_BASE, guest_cs_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_SS_BASE, guest_ss_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_DS_BASE, guest_ds_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_FS_BASE, guest_fs_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_GS_BASE, guest_gs_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_LDTR_BASE, guest_ldtr_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_TR_BASE, guest_tr_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_GDTR_BASE, guest_gdtr_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_IDTR_BASE, guest_idtr_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(TSC_OFFSET, tsc_offset,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2);
+               evmcs_write_field(VIRTUAL_APIC_PAGE_ADDR,
+                                 virtual_apic_page_addr,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2);
+               evmcs_write_field(VMCS_LINK_POINTER, vmcs_link_pointer,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_IA32_PAT, guest_ia32_pat,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_IA32_EFER, guest_ia32_efer,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_PDPTR0, guest_pdptr0,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_PDPTR1, guest_pdptr1,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_PDPTR2, guest_pdptr2,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_PDPTR3, guest_pdptr3,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_PENDING_DBG_EXCEPTIONS,
+                                 guest_pending_dbg_exceptions,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_SYSENTER_ESP, guest_sysenter_esp,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_SYSENTER_EIP, guest_sysenter_eip,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(CR0_GUEST_HOST_MASK, cr0_guest_host_mask,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(CR4_GUEST_HOST_MASK, cr4_guest_host_mask,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(CR0_READ_SHADOW, cr0_read_shadow,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(CR4_READ_SHADOW, cr4_read_shadow,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(GUEST_CR0, guest_cr0,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(GUEST_CR3, guest_cr3,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(GUEST_CR4, guest_cr4,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(GUEST_DR7, guest_dr7,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR);
+               evmcs_write_field(HOST_FS_BASE, host_fs_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER);
+               evmcs_write_field(HOST_GS_BASE, host_gs_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER);
+               evmcs_write_field(HOST_TR_BASE, host_tr_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER);
+               evmcs_write_field(HOST_GDTR_BASE, host_gdtr_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER);
+               evmcs_write_field(HOST_IDTR_BASE, host_idtr_base,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER);
+               evmcs_write_field(HOST_RSP, host_rsp,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER);
+               evmcs_write_field(EPT_POINTER, ept_pointer,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT);
+               evmcs_write_field(GUEST_BNDCFGS, guest_bndcfgs,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(XSS_EXIT_BITMAP, xss_exit_bitmap,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2);
+               /* no mask defined in the spec */
+               evmcs_write_field(VM_EXIT_MSR_STORE_ADDR,
+                                 vm_exit_msr_store_addr, 0xffff);
+               evmcs_write_field(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr,
+                                 0xffff);
+               evmcs_write_field(VM_ENTRY_MSR_LOAD_ADDR,
+                                 vm_entry_msr_load_addr, 0xffff);
+               evmcs_write_field(CR3_TARGET_VALUE0, cr3_target_value0, 0xffff);
+               evmcs_write_field(CR3_TARGET_VALUE1, cr3_target_value1, 0xffff);
+               evmcs_write_field(CR3_TARGET_VALUE2, cr3_target_value2, 0xffff);
+               evmcs_write_field(CR3_TARGET_VALUE3, cr3_target_value3, 0xffff);
+
+               /* 32 bit fields */
+               evmcs_write_field(TPR_THRESHOLD, tpr_threshold,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+               evmcs_write_field(GUEST_INTERRUPTIBILITY_INFO,
+                                 guest_interruptibility_info,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC);
+               evmcs_write_field(CPU_BASED_VM_EXEC_CONTROL,
+                                 cpu_based_vm_exec_control,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC);
+               evmcs_write_field(EXCEPTION_BITMAP, exception_bitmap,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN);
+               evmcs_write_field(VM_ENTRY_CONTROLS, vm_entry_controls,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY);
+               evmcs_write_field(VM_ENTRY_INTR_INFO_FIELD,
+                                 vm_entry_intr_info_field,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT);
+               evmcs_write_field(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                                 vm_entry_exception_error_code,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT);
+               evmcs_write_field(VM_ENTRY_INSTRUCTION_LEN,
+                                 vm_entry_instruction_len,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT);
+               evmcs_write_field(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(PIN_BASED_VM_EXEC_CONTROL,
+                                 pin_based_vm_exec_control,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1);
+               evmcs_write_field(VM_EXIT_CONTROLS, vm_exit_controls,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1);
+               evmcs_write_field(SECONDARY_VM_EXEC_CONTROL,
+                                 secondary_vm_exec_control,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1);
+               evmcs_write_field(GUEST_ES_LIMIT, guest_es_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_CS_LIMIT, guest_cs_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_SS_LIMIT, guest_ss_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_DS_LIMIT, guest_ds_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_FS_LIMIT, guest_fs_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_GS_LIMIT, guest_gs_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_LDTR_LIMIT, guest_ldtr_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_TR_LIMIT, guest_tr_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_GDTR_LIMIT, guest_gdtr_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_IDTR_LIMIT, guest_idtr_limit,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_ES_AR_BYTES, guest_es_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_CS_AR_BYTES, guest_cs_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_SS_AR_BYTES, guest_ss_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_DS_AR_BYTES, guest_ds_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_FS_AR_BYTES, guest_fs_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_GS_AR_BYTES, guest_gs_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_TR_AR_BYTES, guest_tr_ar_bytes,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_ACTIVITY_STATE, guest_activity_state,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               evmcs_write_field(GUEST_SYSENTER_CS, guest_sysenter_cs,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+               /* no mask defined in the spec */
+               evmcs_write_field(PAGE_FAULT_ERROR_CODE_MASK,
+                                 page_fault_error_code_mask, 0xffff);
+               evmcs_write_field(PAGE_FAULT_ERROR_CODE_MATCH,
+                                 page_fault_error_code_match, 0xffff);
+               evmcs_write_field(CR3_TARGET_COUNT, cr3_target_count,
+                                 0xffff);
+               evmcs_write_field(VM_EXIT_MSR_STORE_COUNT,
+                                 vm_exit_msr_store_count, 0xffff);
+               evmcs_write_field(VM_EXIT_MSR_LOAD_COUNT,
+                                 vm_exit_msr_load_count, 0xffff);
+               evmcs_write_field(VM_ENTRY_MSR_LOAD_COUNT,
+                                 vm_entry_msr_load_count, 0xffff);
+
+               /* 16 bit fields */
+               evmcs_write_field(HOST_ES_SELECTOR, host_es_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_CS_SELECTOR, host_cs_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_SS_SELECTOR, host_ss_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_DS_SELECTOR, host_ds_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_FS_SELECTOR, host_fs_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_GS_SELECTOR, host_gs_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(HOST_TR_SELECTOR, host_tr_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1);
+               evmcs_write_field(GUEST_ES_SELECTOR, guest_es_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_CS_SELECTOR, guest_cs_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_SS_SELECTOR, guest_ss_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_DS_SELECTOR, guest_ds_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_FS_SELECTOR, guest_fs_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_GS_SELECTOR, guest_gs_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_LDTR_SELECTOR, guest_ldtr_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(GUEST_TR_SELECTOR, guest_tr_selector,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2);
+               evmcs_write_field(VIRTUAL_PROCESSOR_ID, virtual_processor_id,
+                                 HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT);
+       default:
+               pr_err("VMX: no EVMCS support write:0x%lx\n", field);
+       }
+}
+
+static u64 evmcs_read(unsigned long field)
+{
+       int cpu = smp_processor_id();
+       struct hv_enlightened_vmcs *evmcs =
+               __va(hv_vp_assist_page[cpu]->current_nested_vmcs);
+
+       switch (field) {
+               /* 64 bit fields */
+               evmcs_read_field(GUEST_RIP, guest_rip);
+               evmcs_read_field(GUEST_RSP, guest_rsp);
+               evmcs_read_field(GUEST_RFLAGS, guest_rflags);
+               evmcs_read_field(HOST_IA32_PAT, host_ia32_pat);
+               evmcs_read_field(HOST_IA32_EFER, host_ia32_efer);
+               evmcs_read_field(HOST_CR0, host_cr0);
+               evmcs_read_field(HOST_CR3, host_cr3);
+               evmcs_read_field(HOST_CR4, host_cr4);
+               evmcs_read_field(HOST_IA32_SYSENTER_ESP,
+                                 host_ia32_sysenter_esp);
+               evmcs_read_field(HOST_IA32_SYSENTER_EIP,
+                                 host_ia32_sysenter_eip);
+               evmcs_read_field(HOST_RIP, host_rip);
+               evmcs_read_field(IO_BITMAP_A, io_bitmap_a);
+               evmcs_read_field(IO_BITMAP_B, io_bitmap_b);
+               evmcs_read_field(MSR_BITMAP, msr_bitmap);
+               evmcs_read_field(GUEST_ES_BASE, guest_es_base);
+               evmcs_read_field(GUEST_CS_BASE, guest_cs_base);
+               evmcs_read_field(GUEST_SS_BASE, guest_ss_base);
+               evmcs_read_field(GUEST_DS_BASE, guest_ds_base);
+               evmcs_read_field(GUEST_FS_BASE, guest_fs_base);
+               evmcs_read_field(GUEST_GS_BASE, guest_gs_base);
+               evmcs_read_field(GUEST_LDTR_BASE, guest_ldtr_base);
+               evmcs_read_field(GUEST_TR_BASE, guest_tr_base);
+               evmcs_read_field(GUEST_GDTR_BASE, guest_gdtr_base);
+               evmcs_read_field(GUEST_IDTR_BASE, guest_idtr_base);
+               evmcs_read_field(TSC_OFFSET, tsc_offset);
+               evmcs_read_field(VIRTUAL_APIC_PAGE_ADDR,
+                                virtual_apic_page_addr);
+               evmcs_read_field(VMCS_LINK_POINTER, vmcs_link_pointer);
+               evmcs_read_field(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl);
+               evmcs_read_field(GUEST_IA32_PAT, guest_ia32_pat);
+               evmcs_read_field(GUEST_IA32_EFER, guest_ia32_efer);
+               evmcs_read_field(GUEST_PDPTR0, guest_pdptr0);
+               evmcs_read_field(GUEST_PDPTR1, guest_pdptr1);
+               evmcs_read_field(GUEST_PDPTR2, guest_pdptr2);
+               evmcs_read_field(GUEST_PDPTR3, guest_pdptr3);
+               evmcs_read_field(GUEST_PENDING_DBG_EXCEPTIONS,
+                                 guest_pending_dbg_exceptions);
+               evmcs_read_field(GUEST_SYSENTER_ESP, guest_sysenter_esp);
+               evmcs_read_field(GUEST_SYSENTER_EIP, guest_sysenter_eip);
+               evmcs_read_field(CR0_GUEST_HOST_MASK, cr0_guest_host_mask);
+               evmcs_read_field(CR4_GUEST_HOST_MASK, cr4_guest_host_mask);
+               evmcs_read_field(CR0_READ_SHADOW, cr0_read_shadow);
+               evmcs_read_field(CR4_READ_SHADOW, cr4_read_shadow);
+               evmcs_read_field(GUEST_CR0, guest_cr0);
+               evmcs_read_field(GUEST_CR3, guest_cr3);
+               evmcs_read_field(GUEST_CR4, guest_cr4);
+               evmcs_read_field(GUEST_DR7, guest_dr7);
+               evmcs_read_field(HOST_FS_BASE, host_fs_base);
+               evmcs_read_field(HOST_GS_BASE, host_gs_base);
+               evmcs_read_field(HOST_TR_BASE, host_tr_base);
+               evmcs_read_field(HOST_GDTR_BASE, host_gdtr_base);
+               evmcs_read_field(HOST_IDTR_BASE, host_idtr_base);
+               evmcs_read_field(HOST_RSP, host_rsp);
+               evmcs_read_field(EPT_POINTER, ept_pointer);
+               evmcs_read_field(GUEST_BNDCFGS, guest_bndcfgs);
+               evmcs_read_field(XSS_EXIT_BITMAP, xss_exit_bitmap);
+               evmcs_read_field(GUEST_PHYSICAL_ADDRESS,
+                                guest_physical_address);
+               evmcs_read_field(EXIT_QUALIFICATION, exit_qualification);
+               /*
+                * Not implemented in KVM:
+                * evmcs_read_field(0x00006402, exit_io_instruction_ecx);
+                * evmcs_read_field(0x00006404, exit_io_instruction_esi);
+                * evmcs_read_field(0x00006406, exit_io_instruction_esi);
+                * evmcs_read_field(0x00006408, exit_io_instruction_eip);
+                */
+               evmcs_read_field(GUEST_LINEAR_ADDRESS, guest_linear_address);
+
+               /* no mask defined in the spec */
+               evmcs_read_field(VM_EXIT_MSR_STORE_ADDR,
+                                vm_exit_msr_store_addr);
+               evmcs_read_field(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr);
+               evmcs_read_field(VM_ENTRY_MSR_LOAD_ADDR,
+                                vm_entry_msr_load_addr);
+               evmcs_read_field(CR3_TARGET_VALUE0, cr3_target_value0);
+               evmcs_read_field(CR3_TARGET_VALUE1, cr3_target_value1);
+               evmcs_read_field(CR3_TARGET_VALUE2, cr3_target_value2);
+               evmcs_read_field(CR3_TARGET_VALUE3, cr3_target_value3);
+
+               /* 32 bit fields */
+               evmcs_read_field(TPR_THRESHOLD, tpr_threshold);
+               evmcs_read_field(GUEST_INTERRUPTIBILITY_INFO,
+                                 guest_interruptibility_info);
+               evmcs_read_field(CPU_BASED_VM_EXEC_CONTROL,
+                                 cpu_based_vm_exec_control);
+               evmcs_read_field(EXCEPTION_BITMAP, exception_bitmap);
+               evmcs_read_field(VM_ENTRY_CONTROLS, vm_entry_controls);
+               evmcs_read_field(VM_ENTRY_INTR_INFO_FIELD,
+                                 vm_entry_intr_info_field);
+               evmcs_read_field(VM_ENTRY_EXCEPTION_ERROR_CODE,
+                                 vm_entry_exception_error_code);
+               evmcs_read_field(VM_ENTRY_INSTRUCTION_LEN,
+                                 vm_entry_instruction_len);
+               evmcs_read_field(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs);
+               evmcs_read_field(PIN_BASED_VM_EXEC_CONTROL,
+                                 pin_based_vm_exec_control);
+               evmcs_read_field(VM_EXIT_CONTROLS, vm_exit_controls);
+               evmcs_read_field(SECONDARY_VM_EXEC_CONTROL,
+                                 secondary_vm_exec_control);
+               evmcs_read_field(GUEST_ES_LIMIT, guest_es_limit);
+               evmcs_read_field(GUEST_CS_LIMIT, guest_cs_limit);
+               evmcs_read_field(GUEST_SS_LIMIT, guest_ss_limit);
+               evmcs_read_field(GUEST_DS_LIMIT, guest_ds_limit);
+               evmcs_read_field(GUEST_FS_LIMIT, guest_fs_limit);
+               evmcs_read_field(GUEST_GS_LIMIT, guest_gs_limit);
+               evmcs_read_field(GUEST_LDTR_LIMIT, guest_ldtr_limit);
+               evmcs_read_field(GUEST_TR_LIMIT, guest_tr_limit);
+               evmcs_read_field(GUEST_GDTR_LIMIT, guest_gdtr_limit);
+               evmcs_read_field(GUEST_IDTR_LIMIT, guest_idtr_limit);
+               evmcs_read_field(GUEST_ES_AR_BYTES, guest_es_ar_bytes);
+               evmcs_read_field(GUEST_CS_AR_BYTES, guest_cs_ar_bytes);
+               evmcs_read_field(GUEST_SS_AR_BYTES, guest_ss_ar_bytes);
+               evmcs_read_field(GUEST_DS_AR_BYTES, guest_ds_ar_bytes);
+               evmcs_read_field(GUEST_FS_AR_BYTES, guest_fs_ar_bytes);
+               evmcs_read_field(GUEST_GS_AR_BYTES, guest_gs_ar_bytes);
+               evmcs_read_field(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes);
+               evmcs_read_field(GUEST_TR_AR_BYTES, guest_tr_ar_bytes);
+               evmcs_read_field(GUEST_ACTIVITY_STATE, guest_activity_state);
+               evmcs_read_field(GUEST_SYSENTER_CS, guest_sysenter_cs);
+               evmcs_read_field(VM_INSTRUCTION_ERROR, vm_instruction_error);
+               evmcs_read_field(VM_EXIT_REASON, vm_exit_reason);
+               evmcs_read_field(VM_EXIT_INTR_INFO, vm_exit_intr_info);
+               evmcs_read_field(VM_EXIT_INTR_ERROR_CODE,
+                                vm_exit_intr_error_code);
+               evmcs_read_field(IDT_VECTORING_INFO_FIELD,
+                                idt_vectoring_info_field);
+               evmcs_read_field(IDT_VECTORING_ERROR_CODE,
+                                idt_vectoring_error_code);
+               evmcs_read_field(VM_EXIT_INSTRUCTION_LEN,
+                                vm_exit_instruction_len);
+               evmcs_read_field(VMX_INSTRUCTION_INFO, vmx_instruction_info);
+               /* no mask defined in the spec */
+               evmcs_read_field(PAGE_FAULT_ERROR_CODE_MASK,
+                                page_fault_error_code_mask);
+               evmcs_read_field(PAGE_FAULT_ERROR_CODE_MATCH,
+                                page_fault_error_code_match);
+               evmcs_read_field(CR3_TARGET_COUNT, cr3_target_count);
+               evmcs_read_field(VM_EXIT_MSR_STORE_COUNT,
+                                vm_exit_msr_store_count);
+               evmcs_read_field(VM_EXIT_MSR_LOAD_COUNT,
+                                vm_exit_msr_load_count);
+               evmcs_read_field(VM_ENTRY_MSR_LOAD_COUNT,
+                                vm_entry_msr_load_count);
+
+               /* 16 bit fields */
+               evmcs_read_field(HOST_ES_SELECTOR, host_es_selector);
+               evmcs_read_field(HOST_CS_SELECTOR, host_cs_selector);
+               evmcs_read_field(HOST_SS_SELECTOR, host_ss_selector);
+               evmcs_read_field(HOST_DS_SELECTOR, host_ds_selector);
+               evmcs_read_field(HOST_FS_SELECTOR, host_fs_selector);
+               evmcs_read_field(HOST_GS_SELECTOR, host_gs_selector);
+               evmcs_read_field(HOST_TR_SELECTOR, host_tr_selector);
+               evmcs_read_field(GUEST_ES_SELECTOR, guest_es_selector);
+               evmcs_read_field(GUEST_CS_SELECTOR, guest_cs_selector);
+               evmcs_read_field(GUEST_SS_SELECTOR, guest_ss_selector);
+               evmcs_read_field(GUEST_DS_SELECTOR, guest_ds_selector);
+               evmcs_read_field(GUEST_FS_SELECTOR, guest_fs_selector);
+               evmcs_read_field(GUEST_GS_SELECTOR, guest_gs_selector);
+               evmcs_read_field(GUEST_LDTR_SELECTOR, guest_ldtr_selector);
+               evmcs_read_field(GUEST_TR_SELECTOR, guest_tr_selector);
+               evmcs_read_field(VIRTUAL_PROCESSOR_ID, virtual_processor_id);
+
+       default:
+               pr_err("VMX: no EVMCS support read:0x%lx\n", field);
+       }
+
+       return 0;
+}
+
 static __always_inline void vmcs_check16(unsigned long field)
 {
         BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 
0x2000,
@@ -1676,18 +2199,24 @@ static __always_inline unsigned long 
__vmcs_readl(unsigned long field)
 static __always_inline u16 vmcs_read16(unsigned long field)
 {
        vmcs_check16(field);
+       if (enlightened_vmcs)
+               return evmcs_read(field);
        return __vmcs_readl(field);
 }
 
 static __always_inline u32 vmcs_read32(unsigned long field)
 {
        vmcs_check32(field);
+       if (enlightened_vmcs)
+               return evmcs_read(field);
        return __vmcs_readl(field);
 }
 
 static __always_inline u64 vmcs_read64(unsigned long field)
 {
        vmcs_check64(field);
+       if (enlightened_vmcs)
+               return evmcs_read(field);
 #ifdef CONFIG_X86_64
        return __vmcs_readl(field);
 #else
@@ -1698,6 +2227,8 @@ static __always_inline u64 vmcs_read64(unsigned long 
field)
 static __always_inline unsigned long vmcs_readl(unsigned long field)
 {
        vmcs_checkl(field);
+       if (enlightened_vmcs)
+               return evmcs_read(field);
        return __vmcs_readl(field);
 }
 
@@ -1721,18 +2252,27 @@ static __always_inline void __vmcs_writel(unsigned long 
field, unsigned long val
 static __always_inline void vmcs_write16(unsigned long field, u16 value)
 {
        vmcs_check16(field);
+       if (enlightened_vmcs)
+               return evmcs_write(field, value);
+
        __vmcs_writel(field, value);
 }
 
 static __always_inline void vmcs_write32(unsigned long field, u32 value)
 {
        vmcs_check32(field);
+       if (enlightened_vmcs)
+               return evmcs_write(field, value);
+
        __vmcs_writel(field, value);
 }
 
 static __always_inline void vmcs_write64(unsigned long field, u64 value)
 {
        vmcs_check64(field);
+       if (enlightened_vmcs)
+               return evmcs_write(field, value);
+
        __vmcs_writel(field, value);
 #ifndef CONFIG_X86_64
        asm volatile ("");
@@ -1743,6 +2283,9 @@ static __always_inline void vmcs_write64(unsigned long 
field, u64 value)
 static __always_inline void vmcs_writel(unsigned long field, unsigned long 
value)
 {
        vmcs_checkl(field);
+       if (enlightened_vmcs)
+               return evmcs_write(field, value);
+
        __vmcs_writel(field, value);
 }
 
@@ -1750,6 +2293,9 @@ static __always_inline void vmcs_clear_bits(unsigned long 
field, u32 mask)
 {
         BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 
0x2000,
                         "vmcs_clear_bits does not support 64-bit fields");
+       if (enlightened_vmcs)
+               return evmcs_write(field, evmcs_read(field) & ~mask);
+
        __vmcs_writel(field, __vmcs_readl(field) & ~mask);
 }
 
@@ -1757,6 +2303,9 @@ static __always_inline void vmcs_set_bits(unsigned long 
field, u32 mask)
 {
         BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 
0x2000,
                         "vmcs_set_bits does not support 64-bit fields");
+       if (enlightened_vmcs)
+               return evmcs_write(field, evmcs_read(field) | mask);
+
        __vmcs_writel(field, __vmcs_readl(field) | mask);
 }
 
@@ -3891,7 +4440,11 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
        vmcs_conf->size = vmx_msr_high & 0x1fff;
        vmcs_conf->order = get_order(vmcs_conf->size);
        vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
-       vmcs_conf->revision_id = vmx_msr_low;
+
+       if (enlightened_vmcs)
+               vmcs_conf->revision_id = ms_hyperv.nested_features & 0xff;
+       else
+               vmcs_conf->revision_id = vmx_msr_low;
 
        vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
        vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
@@ -9520,6 +10073,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long cr3, cr4;
+       struct hv_enlightened_vmcs *evmcs = NULL;
 
        /* Record the guest's net vcpu time for enforced NMI injections. */
        if (unlikely(!enable_vnmi &&
@@ -9581,6 +10135,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu 
*vcpu)
        vmx_arm_hv_timer(vcpu);
 
        vmx->__launched = vmx->loaded_vmcs->launched;
+
+       if (enlightened_vmcs) {
+               int cpu = smp_processor_id();
+
+               evmcs = __va(hv_vp_assist_page[cpu]->current_nested_vmcs);
+
+               /* Crude hack: put RSP-8 to enlightened VMCS host_rsp field */
+               asm volatile ("mov %%rsp, (%%rax); sub $32, (%%rax)" : :
+                             "a"(&evmcs->host_rsp));
+               vmx->host_rsp = evmcs->host_rsp;
+       }
        asm(
                /* Store host registers */
                "push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9686,6 +10251,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu 
*vcpu)
 #endif
              );
 
+       /* All fields are CLEAN */
+       if (evmcs)
+               evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
        if (have_spec_ctrl) {
                rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
                if (vmx->spec_ctrl)
@@ -12463,7 +13032,29 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init 
= {
 
 static int __init vmx_init(void)
 {
-       int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+       int r;
+
+#ifdef CONFIG_HYPERVISOR_GUEST
+       if (enlightened_vmcs &&
+           ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
+               int cpu;
+
+               /* check that we have assist pages on all CPUs */
+               for_each_online_cpu(cpu) {
+                       if (!hv_vp_assist_page[cpu]) {
+                               enlightened_vmcs = false;
+                               break;
+                       }
+               }
+
+               if (enlightened_vmcs)
+                       pr_info("VMX: using Hyper-V Enlightened VMCS\n");
+       } else {
+               enlightened_vmcs = false;
+       }
+#endif
+
+       r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
                      __alignof__(struct vcpu_vmx), THIS_MODULE);
        if (r)
                return r;
-- 
2.14.3

Reply via email to