On Mon, Jan 14, 2013 at 03:13:34PM +0800, Yang Zhang wrote:
> From: Yang Zhang <[email protected]>
> 
> basically to benefit from apicv, we need to enable virtualized x2apic mode.
> Currently, we only enable it when guest is really using x2apic.
> 
> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled 
> x2apic:
>     0x800 - 0x8ff: no read intercept for apicv register virtualization,
>                    except APIC ID and TMCCT.
>     APIC ID and TMCCT: need software's assistance to get right value.
> 
Actually since msr bitmap is shared between all vcpus this will break
guests that do not enable x2apic.

> Signed-off-by: Kevin Tian <[email protected]>
> Signed-off-by: Yang Zhang <[email protected]>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/include/asm/vmx.h      |    1 +
>  arch/x86/kvm/lapic.c            |   15 +++-
>  arch/x86/kvm/svm.c              |    6 ++
>  arch/x86/kvm/vmx.c              |  162 
> +++++++++++++++++++++++++++++++++++++--
>  5 files changed, 173 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c431b33..35aa8e6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -697,6 +697,7 @@ struct kvm_x86_ops {
>       void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
>       void (*enable_irq_window)(struct kvm_vcpu *vcpu);
>       void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
> +     void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>       int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
>       int (*get_tdp_level)(void);
>       u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 44c3f7e..0a54df0 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -139,6 +139,7 @@
>  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
>  #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
>  #define SECONDARY_EXEC_RDTSCP                        0x00000008
> +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010
>  #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
>  #define SECONDARY_EXEC_WBINVD_EXITING                0x00000040
>  #define SECONDARY_EXEC_UNRESTRICTED_GUEST    0x00000080
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 0664c13..2ef5e2b 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1323,12 +1323,17 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 
> value)
>       if (!kvm_vcpu_is_bsp(apic->vcpu))
>               value &= ~MSR_IA32_APICBASE_BSP;
>  
> -     vcpu->arch.apic_base = value;
> -     if (apic_x2apic_mode(apic)) {
> -             u32 id = kvm_apic_id(apic);
> -             u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> -             kvm_apic_set_ldr(apic, ldr);
> +     if ((vcpu->arch.apic_base ^ value) & X2APIC_ENABLE) {
> +             if (value & X2APIC_ENABLE) {
> +                     u32 id = kvm_apic_id(apic);
> +                     u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> +                     kvm_apic_set_ldr(apic, ldr);
> +                     kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
> +             } else
> +                     kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
>       }
> +
> +     vcpu->arch.apic_base = value;
>       apic->base_address = apic->vcpu->arch.apic_base &
>                            MSR_IA32_APICBASE_BASE;
>  
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d29d3cd..38407e9 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu 
> *vcpu, int tpr, int irr)
>               set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>  }
>  
> +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
> +{
> +     return;
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
>       struct vcpu_svm *svm = to_svm(vcpu);
> @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>       .enable_nmi_window = enable_nmi_window,
>       .enable_irq_window = enable_irq_window,
>       .update_cr8_intercept = update_cr8_intercept,
> +     .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
>  
>       .set_tss_addr = svm_set_tss_addr,
>       .get_tdp_level = get_npt_level,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 0403634..847022e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -767,6 +767,12 @@ static inline bool 
> cpu_has_vmx_virtualize_apic_accesses(void)
>               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>  }
>  
> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
> +{
> +     return vmcs_config.cpu_based_2nd_exec_ctrl &
> +             SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +}
> +
>  static inline bool cpu_has_vmx_apic_register_virt(void)
>  {
>       return vmcs_config.cpu_based_2nd_exec_ctrl &
> @@ -2543,6 +2549,7 @@ static __init int setup_vmcs_config(struct vmcs_config 
> *vmcs_conf)
>       if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
>               min2 = 0;
>               opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
> +                     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
>                       SECONDARY_EXEC_WBINVD_EXITING |
>                       SECONDARY_EXEC_ENABLE_VPID |
>                       SECONDARY_EXEC_ENABLE_EPT |
> @@ -3724,7 +3731,45 @@ static void free_vpid(struct vcpu_vmx *vmx)
>       spin_unlock(&vmx_vpid_lock);
>  }
>  
> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 
> msr)
> +#define MSR_TYPE_R   1
> +#define MSR_TYPE_W   2
> +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> +                                             u32 msr, int type)
> +{
> +     int f = sizeof(unsigned long);
> +
> +     if (!cpu_has_vmx_msr_bitmap())
> +             return;
> +
> +     /*
> +      * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> +      * have the write-low and read-high bitmap offsets the wrong way round.
> +      * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> +      */
> +     if (msr <= 0x1fff) {
> +             if (type & MSR_TYPE_R)
> +                     /* read-low */
> +                     __clear_bit(msr, msr_bitmap + 0x000 / f);
> +
> +             if (type & MSR_TYPE_W)
> +                     /* write-low */
> +                     __clear_bit(msr, msr_bitmap + 0x800 / f);
> +
> +     } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> +             msr &= 0x1fff;
> +             if (type & MSR_TYPE_R)
> +                     /* read-high */
> +                     __clear_bit(msr, msr_bitmap + 0x400 / f);
> +
> +             if (type & MSR_TYPE_W)
> +                     /* write-high */
> +                     __clear_bit(msr, msr_bitmap + 0xc00 / f);
> +
> +     }
> +}
> +
> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> +                                             u32 msr, int type)
>  {
>       int f = sizeof(unsigned long);
>  
> @@ -3737,20 +3782,75 @@ static void __vmx_disable_intercept_for_msr(unsigned 
> long *msr_bitmap, u32 msr)
>        * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
>        */
>       if (msr <= 0x1fff) {
> -             __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
> -             __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
> +             if (type & MSR_TYPE_R)
> +                     /* read-low */
> +                     __set_bit(msr, msr_bitmap + 0x000 / f);
> +
> +             if (type & MSR_TYPE_W)
> +                     /* write-low */
> +                     __set_bit(msr, msr_bitmap + 0x800 / f);
> +
>       } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>               msr &= 0x1fff;
> -             __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
> -             __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
> +             if (type & MSR_TYPE_R)
> +                     /* read-high */
> +                     __set_bit(msr, msr_bitmap + 0x400 / f);
> +
> +             if (type & MSR_TYPE_W)
> +                     /* write-high */
> +                     __set_bit(msr, msr_bitmap + 0xc00 / f);
> +
>       }
>  }
>  
> +
>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
>  {
>       if (!longmode_only)
> -             __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
> -     __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
> +             __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +                                             msr, MSR_TYPE_R | MSR_TYPE_W);
> +     __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +                                             msr, MSR_TYPE_R | MSR_TYPE_W);
> +}
> +
> +static void vmx_intercept_for_msr_read(u32 msr, bool longmode_only,
> +                                     bool set)
> +{
> +     if (!longmode_only) {
> +             if (set)
> +                     __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +                                     msr, MSR_TYPE_R);
> +             else
> +                     __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +                                     msr, MSR_TYPE_R);
> +
> +     }
> +     if (set)
> +             __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +                             msr, MSR_TYPE_R);
> +     else
> +             __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +                             msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_intercept_for_msr_write(u32 msr, bool longmode_only,
> +                                     bool set)
> +{
> +     if (!longmode_only) {
> +             if (set)
> +                     __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +                                     msr, MSR_TYPE_W);
> +             else
> +                     __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> +                                     msr, MSR_TYPE_W);
> +
> +     }
> +     if (set)
> +             __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +                             msr, MSR_TYPE_W);
> +     else
> +             __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> +                             msr, MSR_TYPE_W);
>  }
>  
>  /*
> @@ -3848,6 +3948,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx 
> *vmx)
>               exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
>       if (!enable_apicv_reg)
>               exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
> +     exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>       return exec_control;
>  }
>  
> @@ -6103,6 +6204,52 @@ static void update_cr8_intercept(struct kvm_vcpu 
> *vcpu, int tpr, int irr)
>       vmcs_write32(TPR_THRESHOLD, irr);
>  }
>  
> +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
> +{
> +     u32 exec_control, sec_exec_control;
> +     int msr;
> +     struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +     /* There is not point to enable virtualize x2apic without enable
> +      * apicv*/
> +     if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg)
> +             return;
> +
> +     if (set) {
> +             exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> +             /* virtualize x2apic mode relies on tpr shadow */
> +             if (!(exec_control & CPU_BASED_TPR_SHADOW))
> +                     return;
> +     }
> +
> +     sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> +
> +     if (set) {
> +             sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> +             sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +     } else {
> +             sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +             if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
> +                     sec_exec_control |=
> +                                     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> +     }
> +     vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
> +
> +     for (msr = 0x800; msr <= 0x8ff; msr++)
> +             vmx_intercept_for_msr_read(msr, false, !set);
> +
> +     if (set) {
> +             /* According SDM, in x2apic mode, the whole id reg is used.
> +              * But in KVM, it only use the highest eight bits. Need to
> +              * intercept it*/
> +             vmx_intercept_for_msr_read(0x802, false, true);
> +             /* TMCCT */
> +             vmx_intercept_for_msr_read(0x839, false, true);
> +     }
> +     /* TPR */
> +     vmx_intercept_for_msr_write(0x808, false, !set);
> +}
> +
>  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
>  {
>       u32 exit_intr_info;
> @@ -7366,6 +7513,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
>       .enable_nmi_window = enable_nmi_window,
>       .enable_irq_window = enable_irq_window,
>       .update_cr8_intercept = update_cr8_intercept,
> +     .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
>  
>       .set_tss_addr = vmx_set_tss_addr,
>       .get_tdp_level = get_ept_level,
> -- 
> 1.7.1

--
                        Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to