Gleb Natapov wrote on 2013-01-24:
> On Wed, Jan 23, 2013 at 10:47:25PM +0800, Yang Zhang wrote:
>> From: Yang Zhang <[email protected]>
>> 
>> basically to benefit from apicv, we need to enable virtualized x2apic mode.
>> Currently, we only enable it when guest is really using x2apic.
>> 
>> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled
>> x2apic: 0x800 - 0x8ff: no read intercept for apicv register
>> virtualization,
>>                except APIC ID and TMCCT which need software's
>>                assistance to get right value.
>> Signed-off-by: Kevin Tian <[email protected]>
>> Signed-off-by: Yang Zhang <[email protected]>
>> ---
>>  arch/x86/include/asm/kvm_host.h |    1 + arch/x86/include/asm/vmx.h   
>>    |    1 + arch/x86/kvm/lapic.c            |   14 ++-
>>  arch/x86/kvm/svm.c              |    6 + arch/x86/kvm/vmx.c           
>>    |  203 +++++++++++++++++++++++++++++++++++---- 5 files changed, 201
>>  insertions(+), 24 deletions(-)
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h index f75e1fe..e1306c1 100644 ---
>> a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -692,6 +692,7 @@ struct kvm_x86_ops {
>>      void (*enable_nmi_window)(struct kvm_vcpu *vcpu);       void
>>  (*enable_irq_window)(struct kvm_vcpu *vcpu);        void
>>  (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
>>  +   void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>>      int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);        int
>>  (*get_tdp_level)(void);     u64 (*get_mt_mask)(struct kvm_vcpu *vcpu,
>>  gfn_t gfn, bool is_mmio);
>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>> index 44c3f7e..0a54df0 100644
>> --- a/arch/x86/include/asm/vmx.h
>> +++ b/arch/x86/include/asm/vmx.h
>> @@ -139,6 +139,7 @@
>>  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define
>>  SECONDARY_EXEC_ENABLE_EPT               0x00000002 #define
>>  SECONDARY_EXEC_RDTSCP                       0x00000008 +#define
>>  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010 #define
>>  SECONDARY_EXEC_ENABLE_VPID              0x00000020 #define
>>  SECONDARY_EXEC_WBINVD_EXITING               0x00000040 #define
>>  SECONDARY_EXEC_UNRESTRICTED_GUEST   0x00000080
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> index 0664c13..83a9547 100644
>> --- a/arch/x86/kvm/lapic.c
>> +++ b/arch/x86/kvm/lapic.c
>> @@ -1303,6 +1303,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
>> 
>>  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) { +       u64
>>  old_value = vcpu->arch.apic_base;   struct kvm_lapic *apic =
>>  vcpu->arch.apic;
>>  
>>      if (!apic) {
>> @@ -1324,11 +1325,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu,
> u64 value)
>>              value &= ~MSR_IA32_APICBASE_BSP;
>>  
>>      vcpu->arch.apic_base = value;
>> -    if (apic_x2apic_mode(apic)) {
>> -            u32 id = kvm_apic_id(apic);
>> -            u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
>> -            kvm_apic_set_ldr(apic, ldr);
>> +    if ((old_value ^ value) & X2APIC_ENABLE) {
>> +            if (value & X2APIC_ENABLE) {
>> +                    u32 id = kvm_apic_id(apic);
>> +                    u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
>> +                    kvm_apic_set_ldr(apic, ldr);
>> +                    kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
>> +            } else
>> +                    kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
>>      } +     apic->base_address = apic->vcpu->arch.apic_base &               
>>             
>>  MSR_IA32_APICBASE_BASE;
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index d29d3cd..38407e9 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu
> *vcpu, int tpr, int irr)
>>              set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>>  }
>> +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>> +{
>> +    return;
>> +}
>> +
>>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) {         struct vcpu_svm
>>  *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops
>>  svm_x86_ops = {     .enable_nmi_window = enable_nmi_window,
>>      .enable_irq_window = enable_irq_window,         .update_cr8_intercept =
>>  update_cr8_intercept,
>> +    .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
>> 
>>      .set_tss_addr = svm_set_tss_addr,
>>      .get_tdp_level = get_npt_level,
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 8a8116a..c2bc989 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -643,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a;
>>  static unsigned long *vmx_io_bitmap_b;
>>  static unsigned long *vmx_msr_bitmap_legacy;
>>  static unsigned long *vmx_msr_bitmap_longmode;
>> +static unsigned long *vmx_msr_bitmap_legacy_x2apic;
>> +static unsigned long *vmx_msr_bitmap_longmode_x2apic;
>> 
>>  static bool cpu_has_load_ia32_efer;
>>  static bool cpu_has_load_perf_global_ctrl;
>> @@ -767,6 +769,12 @@ static inline bool
> cpu_has_vmx_virtualize_apic_accesses(void)
>>              SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>  }
>> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
>> +{
>> +    return vmcs_config.cpu_based_2nd_exec_ctrl &
>> +            SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>> +}
>> +
>>  static inline bool cpu_has_vmx_apic_register_virt(void)
>>  {
>>      return vmcs_config.cpu_based_2nd_exec_ctrl &
>> @@ -1830,6 +1838,24 @@ static void move_msr_up(struct vcpu_vmx *vmx, int
> from, int to)
>>      vmx->guest_msrs[from] = tmp;
>>  }
>> +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
>> +{
>> +    unsigned long *msr_bitmap;
>> +
>> +    if (vcpu->arch.apic_base & X2APIC_ENABLE)
>> if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->apic))
Sure.
 
>> +            if (is_long_mode(vcpu))
>> +                    msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
>> +            else
>> +                    msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
>> +    else
>> +            if (is_long_mode(vcpu))
>> +                    msr_bitmap = vmx_msr_bitmap_longmode;
>> +            else
>> +                    msr_bitmap = vmx_msr_bitmap_legacy;
>> +
>> +    vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
>> +}
>> +
>>  /*
>>   * Set up the vmcs to automatically save and restore system
>>   * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
>> @@ -1838,7 +1864,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int
> from, int to)
>>  static void setup_msrs(struct vcpu_vmx *vmx)
>>  {
>>      int save_nmsrs, index;
>> -    unsigned long *msr_bitmap;
>> 
>>      save_nmsrs = 0;
>>  #ifdef CONFIG_X86_64
>> @@ -1870,14 +1895,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
>> 
>>      vmx->save_nmsrs = save_nmsrs;
>> -    if (cpu_has_vmx_msr_bitmap()) {
>> -            if (is_long_mode(&vmx->vcpu))
>> -                    msr_bitmap = vmx_msr_bitmap_longmode;
>> -            else
>> -                    msr_bitmap = vmx_msr_bitmap_legacy;
>> -
>> -            vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
>> -    }
>> +    if (cpu_has_vmx_msr_bitmap())
>> +            vmx_set_msr_bitmap(&vmx->vcpu);
>>  }
>>  
>>  /*
>> @@ -2543,6 +2562,7 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
>>      if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
>>  {           min2 = 0;               opt2 = 
>> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>>  +                   SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
>>                      SECONDARY_EXEC_WBINVD_EXITING |                         
>> SECONDARY_EXEC_ENABLE_VPID |
>>                      SECONDARY_EXEC_ENABLE_EPT |
>> @@ -2564,7 +2584,8 @@ static __init int setup_vmcs_config(struct
>> vmcs_config *vmcs_conf)
>> 
>>      if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
>>              _cpu_based_2nd_exec_control &= ~(
>> -                            SECONDARY_EXEC_APIC_REGISTER_VIRT);
>> +                            SECONDARY_EXEC_APIC_REGISTER_VIRT |
>> +                            SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
>> 
>>      if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {          
>> /*
>>  CR3 accesses and invlpg don't need to cause VM Exits when EPT @@
>>  -3724,7 +3745,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
>>      spin_unlock(&vmx_vpid_lock); }
>> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
>> u32 msr) +#define MSR_TYPE_R 1 +#define MSR_TYPE_W   2 +static void
>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +                 
>>                         u32
>> msr, int type)
>>  {
>>      int f = sizeof(unsigned long);
>> @@ -3737,20 +3761,93 @@ static void
> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>>       * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
>>       */
>>      if (msr <= 0x1fff) {
>> -            __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
>> -            __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
>> +            if (type & MSR_TYPE_R)
>> +                    /* read-low */
>> +                    __clear_bit(msr, msr_bitmap + 0x000 / f);
>> +
>> +            if (type & MSR_TYPE_W)
>> +                    /* write-low */
>> +                    __clear_bit(msr, msr_bitmap + 0x800 / f);
>> +
>>      } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>>              msr &= 0x1fff;
>> -            __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
>> -            __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
>> +            if (type & MSR_TYPE_R)
>> +                    /* read-high */
>> +                    __clear_bit(msr, msr_bitmap + 0x400 / f);
>> +
>> +            if (type & MSR_TYPE_W)
>> +                    /* write-high */
>> +                    __clear_bit(msr, msr_bitmap + 0xc00 / f);
>> +
>> +    }
>> +}
>> +
>> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
>> +                                            u32 msr, int type)
>> +{
>> +    int f = sizeof(unsigned long);
>> +
>> +    if (!cpu_has_vmx_msr_bitmap())
>> +            return;
>> +
>> +    /*
>> +     * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
>> +     * have the write-low and read-high bitmap offsets the wrong way round.
>> +     * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
>> +     */
>> +    if (msr <= 0x1fff) {
>> +            if (type & MSR_TYPE_R)
>> +                    /* read-low */
>> +                    __set_bit(msr, msr_bitmap + 0x000 / f);
>> +
>> +            if (type & MSR_TYPE_W)
>> +                    /* write-low */
>> +                    __set_bit(msr, msr_bitmap + 0x800 / f);
>> +
>> +    } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>> +            msr &= 0x1fff;
>> +            if (type & MSR_TYPE_R)
>> +                    /* read-high */
>> +                    __set_bit(msr, msr_bitmap + 0x400 / f);
>> +
>> +            if (type & MSR_TYPE_W)
>> +                    /* write-high */
>> +                    __set_bit(msr, msr_bitmap + 0xc00 / f);
>> +
>>      }
>>  }
>>  
>>  static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
>>  {
>>      if (!longmode_only)
>> -            __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
>> -    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
>> +            __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
>> +                                            msr, MSR_TYPE_R | MSR_TYPE_W);
>> +    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>> +                                            msr, MSR_TYPE_R | MSR_TYPE_W);
>> +}
>> +
>> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
>> +{
>> +    __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
>> +                    msr, MSR_TYPE_R);
>> +    __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
>> +                    msr, MSR_TYPE_R);
>> +}
>> +
>> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
>> +{
>> +    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
>> +                    msr, MSR_TYPE_R);
>> +    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
>> +                    msr, MSR_TYPE_R);
>> +}
>> +
>> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
>> +{
>> +    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
>> +                    msr, MSR_TYPE_W);
>> +    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
>> +                    msr, MSR_TYPE_W);
>>  }
>>  
>>  /*
>> @@ -3848,6 +3945,7 @@ static u32 vmx_secondary_exec_control(struct
> vcpu_vmx *vmx)
>>              exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;     if
>>  (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm))
>>              exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; +   
>> exec_control
>>  &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;  return exec_control; }
>> @@ -6103,6 +6201,40 @@ static void update_cr8_intercept(struct kvm_vcpu
> *vcpu, int tpr, int irr)
>>      vmcs_write32(TPR_THRESHOLD, irr);
>>  }
>> +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>> +{
>> +    u32 exec_control, sec_exec_control;
>> +    struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +
>> +    /* There is not point to enable virtualize x2apic without enable
>> +     * apicv
>> +     */
>> +    if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg)
>> +            return;
>> +
>> +    if (set) {
> Just add vm_need_tpr_shadow() to the if above.
Sure.
 
>> +            exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); +        
>>         /*
>> virtualize x2apic mode relies on tpr shadow */ +             if 
>> (!(exec_control &
>> CPU_BASED_TPR_SHADOW)) +                     return; +       } + +   
>> sec_exec_control =
>> vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + +  if (set) {
>> +            sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>> +            sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; +    
>> } else
>> { +          sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; +   
>>         if
>> (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
> How enable_apicv_reg can be true without virtualized apic access?
Right. This check always true here. will drop it.



Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to