On 10/24/2014 11:18 AM, Tiejun Chen wrote:
> Instead of vmx_init(), actually it would make reasonable sense to do
> anything specific to vmx hardware setting in vmx_x86_ops->hardware_setup().
>
> Signed-off-by: Tiejun Chen <[email protected]>
Please split this patch in multiple parts. It is quite hard to review
this way.
Paolo
> ---
> arch/x86/kvm/vmx.c | 720
> +++++++++++++++++++++++++++--------------------------
> 1 file changed, 361 insertions(+), 359 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 04fa1b8..9270076 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3106,10 +3106,302 @@ static __init int alloc_kvm_area(void)
> return 0;
> }
>
> +#define MSR_TYPE_R 1
> +#define MSR_TYPE_W 2
> +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> + u32 msr, int type)
> +{
> + int f = sizeof(unsigned long);
> +
> + if (!cpu_has_vmx_msr_bitmap())
> + return;
> +
> + /*
> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> + * have the write-low and read-high bitmap offsets the wrong way round.
> + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> + */
> + if (msr <= 0x1fff) {
> + if (type & MSR_TYPE_R)
> + /* read-low */
> + __clear_bit(msr, msr_bitmap + 0x000 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-low */
> + __clear_bit(msr, msr_bitmap + 0x800 / f);
> +
> + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> + msr &= 0x1fff;
> + if (type & MSR_TYPE_R)
> + /* read-high */
> + __clear_bit(msr, msr_bitmap + 0x400 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-high */
> + __clear_bit(msr, msr_bitmap + 0xc00 / f);
> +
> + }
> +}
> +
> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> + u32 msr, int type)
> +{
> + int f = sizeof(unsigned long);
> +
> + if (!cpu_has_vmx_msr_bitmap())
> + return;
> +
> + /*
> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> + * have the write-low and read-high bitmap offsets the wrong way round.
> + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> + */
> + if (msr <= 0x1fff) {
> + if (type & MSR_TYPE_R)
> + /* read-low */
> + __set_bit(msr, msr_bitmap + 0x000 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-low */
> + __set_bit(msr, msr_bitmap + 0x800 / f);
> +
> + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> + msr &= 0x1fff;
> + if (type & MSR_TYPE_R)
> + /* read-high */
> + __set_bit(msr, msr_bitmap + 0x400 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-high */
> + __set_bit(msr, msr_bitmap + 0xc00 / f);
> +
> + }
> +}
> +
> +static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> +{
> + if (!longmode_only)
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> + msr, MSR_TYPE_R | MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> + msr, MSR_TYPE_R | MSR_TYPE_W);
> +}
> +
> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> +{
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +{
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +{
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_W);
> +}
> +
> +static int vmx_vm_has_apicv(struct kvm *kvm)
> +{
> + return enable_apicv && irqchip_in_kernel(kvm);
> +}
> +
> +static void ept_set_mmio_spte_mask(void)
> +{
> + /*
> + * EPT Misconfigurations can be generated if the value of bits 2:0
> + * of an EPT paging-structure entry is 110b (write/execute).
> + * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> + * spte.
> + */
> + kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> +}
> +
> +static int __grow_ple_window(int val)
> +{
> + if (ple_window_grow < 1)
> + return ple_window;
> +
> + val = min(val, ple_window_actual_max);
> +
> + if (ple_window_grow < ple_window)
> + val *= ple_window_grow;
> + else
> + val += ple_window_grow;
> +
> + return val;
> +}
> +
> +static int __shrink_ple_window(int val, int modifier, int minimum)
> +{
> + if (modifier < 1)
> + return ple_window;
> +
> + if (modifier < ple_window)
> + val /= modifier;
> + else
> + val -= modifier;
> +
> + return max(val, minimum);
> +}
> +
> +static void grow_ple_window(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + int old = vmx->ple_window;
> +
> + vmx->ple_window = __grow_ple_window(old);
> +
> + if (vmx->ple_window != old)
> + vmx->ple_window_dirty = true;
> +
> + trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +static void shrink_ple_window(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + int old = vmx->ple_window;
> +
> + vmx->ple_window = __shrink_ple_window(old,
> + ple_window_shrink, ple_window);
> +
> + if (vmx->ple_window != old)
> + vmx->ple_window_dirty = true;
> +
> + trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +/*
> + * ple_window_actual_max is computed to be one grow_ple_window() below
> + * ple_window_max. (See __grow_ple_window for the reason.)
> + * This prevents overflows, because ple_window_max is int.
> + * ple_window_max effectively rounded down to a multiple of ple_window_grow
> in
> + * this process.
> + * ple_window_max is also prevented from setting vmx->ple_window <
> ple_window.
> + */
> +static void update_ple_window_actual_max(void)
> +{
> + ple_window_actual_max =
> + __shrink_ple_window(max(ple_window_max, ple_window),
> + ple_window_grow, INT_MIN);
> +}
> +
> +
> static __init int hardware_setup(void)
> {
> - if (setup_vmcs_config(&vmcs_config) < 0)
> - return -EIO;
> + int r = -ENOMEM, i, msr;
> +
> + rdmsrl_safe(MSR_EFER, &host_efer);
> +
> + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> + kvm_define_shared_msr(i, vmx_msr_index[i]);
> +
> + vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_io_bitmap_a)
> + return r;
> +
> + vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_io_bitmap_b)
> + goto out;
> +
> + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_legacy)
> + goto out1;
> +
> + vmx_msr_bitmap_legacy_x2apic =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_legacy_x2apic)
> + goto out2;
> +
> + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_longmode)
> + goto out3;
> +
> + vmx_msr_bitmap_longmode_x2apic =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_longmode_x2apic)
> + goto out4;
> + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_vmread_bitmap)
> + goto out5;
> +
> + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_vmwrite_bitmap)
> + goto out6;
> +
> + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> +
> + /*
> + * Allow direct access to the PC debug port (it is often used for I/O
> + * delays, but the vmexits simply slow things down).
> + */
> + memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> + clear_bit(0x80, vmx_io_bitmap_a);
> +
> + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> +
> + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> +
> + vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> + vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> +
> + memcpy(vmx_msr_bitmap_legacy_x2apic,
> + vmx_msr_bitmap_legacy, PAGE_SIZE);
> + memcpy(vmx_msr_bitmap_longmode_x2apic,
> + vmx_msr_bitmap_longmode, PAGE_SIZE);
> +
> + if (enable_apicv) {
> + for (msr = 0x800; msr <= 0x8ff; msr++)
> + vmx_disable_intercept_msr_read_x2apic(msr);
> +
> + /* According SDM, in x2apic mode, the whole id reg is used.
> + * But in KVM, it only use the highest eight bits. Need to
> + * intercept it */
> + vmx_enable_intercept_msr_read_x2apic(0x802);
> + /* TMCCT */
> + vmx_enable_intercept_msr_read_x2apic(0x839);
> + /* TPR */
> + vmx_disable_intercept_msr_write_x2apic(0x808);
> + /* EOI */
> + vmx_disable_intercept_msr_write_x2apic(0x80b);
> + /* SELF-IPI */
> + vmx_disable_intercept_msr_write_x2apic(0x83f);
> + }
> +
> + if (enable_ept) {
> + kvm_mmu_set_mask_ptes(0ull,
> + (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> + (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> + 0ull, VMX_EPT_EXECUTABLE_MASK);
> + ept_set_mmio_spte_mask();
> + kvm_enable_tdp();
> + } else
> + kvm_disable_tdp();
> +
> + update_ple_window_actual_max();
> +
> + if (setup_vmcs_config(&vmcs_config) < 0) {
> + r = -EIO;
> + goto out7;
> + }
>
> if (boot_cpu_has(X86_FEATURE_NX))
> kvm_enable_efer_bits(EFER_NX);
> @@ -3169,10 +3461,38 @@ static __init int hardware_setup(void)
> nested_vmx_setup_ctls_msrs();
>
> return alloc_kvm_area();
> +
> +out7:
> + free_page((unsigned long)vmx_vmwrite_bitmap);
> +out6:
> + free_page((unsigned long)vmx_vmread_bitmap);
> +out5:
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> +out4:
> + free_page((unsigned long)vmx_msr_bitmap_longmode);
> +out3:
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> +out2:
> + free_page((unsigned long)vmx_msr_bitmap_legacy);
> +out1:
> + free_page((unsigned long)vmx_io_bitmap_b);
> +out:
> + free_page((unsigned long)vmx_io_bitmap_a);
> +
> + return r;
> }
>
> static __exit void hardware_unsetup(void)
> {
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_legacy);
> + free_page((unsigned long)vmx_msr_bitmap_longmode);
> + free_page((unsigned long)vmx_io_bitmap_b);
> + free_page((unsigned long)vmx_io_bitmap_a);
> + free_page((unsigned long)vmx_vmwrite_bitmap);
> + free_page((unsigned long)vmx_vmread_bitmap);
> +
> free_kvm_area();
> }
>
> @@ -4057,162 +4377,52 @@ static int alloc_apic_access_page(struct kvm *kvm)
> kvm->arch.apic_access_page_done = true;
> out:
> mutex_unlock(&kvm->slots_lock);
> - return r;
> -}
> -
> -static int alloc_identity_pagetable(struct kvm *kvm)
> -{
> - /* Called with kvm->slots_lock held. */
> -
> - struct kvm_userspace_memory_region kvm_userspace_mem;
> - int r = 0;
> -
> - BUG_ON(kvm->arch.ept_identity_pagetable_done);
> -
> - kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> - kvm_userspace_mem.flags = 0;
> - kvm_userspace_mem.guest_phys_addr =
> - kvm->arch.ept_identity_map_addr;
> - kvm_userspace_mem.memory_size = PAGE_SIZE;
> - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> -
> - return r;
> -}
> -
> -static void allocate_vpid(struct vcpu_vmx *vmx)
> -{
> - int vpid;
> -
> - vmx->vpid = 0;
> - if (!enable_vpid)
> - return;
> - spin_lock(&vmx_vpid_lock);
> - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> - if (vpid < VMX_NR_VPIDS) {
> - vmx->vpid = vpid;
> - __set_bit(vpid, vmx_vpid_bitmap);
> - }
> - spin_unlock(&vmx_vpid_lock);
> -}
> -
> -static void free_vpid(struct vcpu_vmx *vmx)
> -{
> - if (!enable_vpid)
> - return;
> - spin_lock(&vmx_vpid_lock);
> - if (vmx->vpid != 0)
> - __clear_bit(vmx->vpid, vmx_vpid_bitmap);
> - spin_unlock(&vmx_vpid_lock);
> -}
> -
> -#define MSR_TYPE_R 1
> -#define MSR_TYPE_W 2
> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> - u32 msr, int type)
> -{
> - int f = sizeof(unsigned long);
> -
> - if (!cpu_has_vmx_msr_bitmap())
> - return;
> -
> - /*
> - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> - * have the write-low and read-high bitmap offsets the wrong way round.
> - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> - */
> - if (msr <= 0x1fff) {
> - if (type & MSR_TYPE_R)
> - /* read-low */
> - __clear_bit(msr, msr_bitmap + 0x000 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-low */
> - __clear_bit(msr, msr_bitmap + 0x800 / f);
> -
> - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> - msr &= 0x1fff;
> - if (type & MSR_TYPE_R)
> - /* read-high */
> - __clear_bit(msr, msr_bitmap + 0x400 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-high */
> - __clear_bit(msr, msr_bitmap + 0xc00 / f);
> -
> - }
> -}
> -
> -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> - u32 msr, int type)
> -{
> - int f = sizeof(unsigned long);
> -
> - if (!cpu_has_vmx_msr_bitmap())
> - return;
> -
> - /*
> - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> - * have the write-low and read-high bitmap offsets the wrong way round.
> - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> - */
> - if (msr <= 0x1fff) {
> - if (type & MSR_TYPE_R)
> - /* read-low */
> - __set_bit(msr, msr_bitmap + 0x000 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-low */
> - __set_bit(msr, msr_bitmap + 0x800 / f);
> -
> - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> - msr &= 0x1fff;
> - if (type & MSR_TYPE_R)
> - /* read-high */
> - __set_bit(msr, msr_bitmap + 0x400 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-high */
> - __set_bit(msr, msr_bitmap + 0xc00 / f);
> -
> - }
> -}
> -
> -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> -{
> - if (!longmode_only)
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> - msr, MSR_TYPE_R | MSR_TYPE_W);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> - msr, MSR_TYPE_R | MSR_TYPE_W);
> -}
> -
> -static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> -{
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + return r;
> }
>
> -static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +static int alloc_identity_pagetable(struct kvm *kvm)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + /* Called with kvm->slots_lock held. */
> +
> + struct kvm_userspace_memory_region kvm_userspace_mem;
> + int r = 0;
> +
> + BUG_ON(kvm->arch.ept_identity_pagetable_done);
> +
> + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> + kvm_userspace_mem.flags = 0;
> + kvm_userspace_mem.guest_phys_addr =
> + kvm->arch.ept_identity_map_addr;
> + kvm_userspace_mem.memory_size = PAGE_SIZE;
> + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> +
> + return r;
> }
>
> -static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +static void allocate_vpid(struct vcpu_vmx *vmx)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_W);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_W);
> + int vpid;
> +
> + vmx->vpid = 0;
> + if (!enable_vpid)
> + return;
> + spin_lock(&vmx_vpid_lock);
> + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> + if (vpid < VMX_NR_VPIDS) {
> + vmx->vpid = vpid;
> + __set_bit(vpid, vmx_vpid_bitmap);
> + }
> + spin_unlock(&vmx_vpid_lock);
> }
>
> -static int vmx_vm_has_apicv(struct kvm *kvm)
> +static void free_vpid(struct vcpu_vmx *vmx)
> {
> - return enable_apicv && irqchip_in_kernel(kvm);
> + if (!enable_vpid)
> + return;
> + spin_lock(&vmx_vpid_lock);
> + if (vmx->vpid != 0)
> + __clear_bit(vmx->vpid, vmx_vpid_bitmap);
> + spin_unlock(&vmx_vpid_lock);
> }
>
> /*
> @@ -4376,17 +4586,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx
> *vmx)
> return exec_control;
> }
>
> -static void ept_set_mmio_spte_mask(void)
> -{
> - /*
> - * EPT Misconfigurations can be generated if the value of bits 2:0
> - * of an EPT paging-structure entry is 110b (write/execute).
> - * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> - * spte.
> - */
> - kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> -}
> -
> /*
> * Sets up the vmcs for emulated real mode.
> */
> @@ -5706,76 +5905,6 @@ out:
> return ret;
> }
>
> -static int __grow_ple_window(int val)
> -{
> - if (ple_window_grow < 1)
> - return ple_window;
> -
> - val = min(val, ple_window_actual_max);
> -
> - if (ple_window_grow < ple_window)
> - val *= ple_window_grow;
> - else
> - val += ple_window_grow;
> -
> - return val;
> -}
> -
> -static int __shrink_ple_window(int val, int modifier, int minimum)
> -{
> - if (modifier < 1)
> - return ple_window;
> -
> - if (modifier < ple_window)
> - val /= modifier;
> - else
> - val -= modifier;
> -
> - return max(val, minimum);
> -}
> -
> -static void grow_ple_window(struct kvm_vcpu *vcpu)
> -{
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> - int old = vmx->ple_window;
> -
> - vmx->ple_window = __grow_ple_window(old);
> -
> - if (vmx->ple_window != old)
> - vmx->ple_window_dirty = true;
> -
> - trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -static void shrink_ple_window(struct kvm_vcpu *vcpu)
> -{
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> - int old = vmx->ple_window;
> -
> - vmx->ple_window = __shrink_ple_window(old,
> - ple_window_shrink, ple_window);
> -
> - if (vmx->ple_window != old)
> - vmx->ple_window_dirty = true;
> -
> - trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -/*
> - * ple_window_actual_max is computed to be one grow_ple_window() below
> - * ple_window_max. (See __grow_ple_window for the reason.)
> - * This prevents overflows, because ple_window_max is int.
> - * ple_window_max effectively rounded down to a multiple of ple_window_grow
> in
> - * this process.
> - * ple_window_max is also prevented from setting vmx->ple_window <
> ple_window.
> - */
> -static void update_ple_window_actual_max(void)
> -{
> - ple_window_actual_max =
> - __shrink_ple_window(max(ple_window_max, ple_window),
> - ple_window_grow, INT_MIN);
> -}
> -
> /*
> * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
> * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
> @@ -9158,150 +9287,23 @@ static struct kvm_x86_ops vmx_x86_ops = {
>
> static int __init vmx_init(void)
> {
> - int r, i, msr;
> -
> - rdmsrl_safe(MSR_EFER, &host_efer);
> -
> - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> - kvm_define_shared_msr(i, vmx_msr_index[i]);
> -
> - vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_io_bitmap_a)
> - return -ENOMEM;
> -
> - r = -ENOMEM;
> -
> - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_io_bitmap_b)
> - goto out;
> -
> - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_legacy)
> - goto out1;
> -
> - vmx_msr_bitmap_legacy_x2apic =
> - (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_legacy_x2apic)
> - goto out2;
> -
> - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_longmode)
> - goto out3;
> -
> - vmx_msr_bitmap_longmode_x2apic =
> - (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_longmode_x2apic)
> - goto out4;
> - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_vmread_bitmap)
> - goto out5;
> -
> - vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_vmwrite_bitmap)
> - goto out6;
> -
> - memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> - memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> -
> - /*
> - * Allow direct access to the PC debug port (it is often used for I/O
> - * delays, but the vmexits simply slow things down).
> - */
> - memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> - clear_bit(0x80, vmx_io_bitmap_a);
> -
> - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> -
> - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> -
> - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> + int r = -ENOMEM;
>
> r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
> __alignof__(struct vcpu_vmx), THIS_MODULE);
> if (r)
> - goto out7;
> + return r;
>
> #ifdef CONFIG_KEXEC
> rcu_assign_pointer(crash_vmclear_loaded_vmcss,
> crash_vmclear_local_loaded_vmcss);
> #endif
>
> - vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> - vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> -
> - memcpy(vmx_msr_bitmap_legacy_x2apic,
> - vmx_msr_bitmap_legacy, PAGE_SIZE);
> - memcpy(vmx_msr_bitmap_longmode_x2apic,
> - vmx_msr_bitmap_longmode, PAGE_SIZE);
> -
> - if (enable_apicv) {
> - for (msr = 0x800; msr <= 0x8ff; msr++)
> - vmx_disable_intercept_msr_read_x2apic(msr);
> -
> - /* According SDM, in x2apic mode, the whole id reg is used.
> - * But in KVM, it only use the highest eight bits. Need to
> - * intercept it */
> - vmx_enable_intercept_msr_read_x2apic(0x802);
> - /* TMCCT */
> - vmx_enable_intercept_msr_read_x2apic(0x839);
> - /* TPR */
> - vmx_disable_intercept_msr_write_x2apic(0x808);
> - /* EOI */
> - vmx_disable_intercept_msr_write_x2apic(0x80b);
> - /* SELF-IPI */
> - vmx_disable_intercept_msr_write_x2apic(0x83f);
> - }
> -
> - if (enable_ept) {
> - kvm_mmu_set_mask_ptes(0ull,
> - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> - (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> - 0ull, VMX_EPT_EXECUTABLE_MASK);
> - ept_set_mmio_spte_mask();
> - kvm_enable_tdp();
> - } else
> - kvm_disable_tdp();
> -
> - update_ple_window_actual_max();
> -
> return 0;
> -
> -out7:
> - free_page((unsigned long)vmx_vmwrite_bitmap);
> -out6:
> - free_page((unsigned long)vmx_vmread_bitmap);
> -out5:
> - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> -out4:
> - free_page((unsigned long)vmx_msr_bitmap_longmode);
> -out3:
> - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> -out2:
> - free_page((unsigned long)vmx_msr_bitmap_legacy);
> -out1:
> - free_page((unsigned long)vmx_io_bitmap_b);
> -out:
> - free_page((unsigned long)vmx_io_bitmap_a);
> - return r;
> }
>
> static void __exit vmx_exit(void)
> {
> - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> - free_page((unsigned long)vmx_msr_bitmap_legacy);
> - free_page((unsigned long)vmx_msr_bitmap_longmode);
> - free_page((unsigned long)vmx_io_bitmap_b);
> - free_page((unsigned long)vmx_io_bitmap_a);
> - free_page((unsigned long)vmx_vmwrite_bitmap);
> - free_page((unsigned long)vmx_vmread_bitmap);
> -
> #ifdef CONFIG_KEXEC
> RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
> synchronize_rcu();
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html