Gleb Natapov wrote on 2013-01-14:
> On Mon, Jan 14, 2013 at 11:01:02AM +0000, Zhang, Yang Z wrote:
>> Gleb Natapov wrote on 2013-01-14:
>>> On Mon, Jan 14, 2013 at 03:13:34PM +0800, Yang Zhang wrote:
>>>> From: Yang Zhang <[email protected]>
>>>> 
>>>> basically to benefit from apicv, we need to enable virtualized x2apic mode.
>>>> Currently, we only enable it when guest is really using x2apic.
>>>> 
>>>> Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled
>>> x2apic:
>>>>     0x800 - 0x8ff: no read intercept for apicv register virtualization,
>>>>                    except APIC ID and TMCCT.
>>>>     APIC ID and TMCCT: need software's assistance to get right value.
>>> Actually since msr bitmap is shared between all vcpus this will break
>>> guests that do not enable x2apic.
>> I don't think this case will exist. It will break the real OS too.
>> 
> Which case? One VM uses x2apic another one does not? Bitmap is shared
> between all vcpus of all VMs.
Sorry. I misread your comments.

Yes, it is really a problem. Maybe we need to use per VM msr bitmap instead 
global bitmap.
 
>>>> Signed-off-by: Kevin Tian <[email protected]>
>>>> Signed-off-by: Yang Zhang <[email protected]>
>>>> ---
>>>>  arch/x86/include/asm/kvm_host.h |    1 + arch/x86/include/asm/vmx.h
>>>>    |    1 + arch/x86/kvm/lapic.c            |   15 +++-
>>>>  arch/x86/kvm/svm.c              |    6 ++ arch/x86/kvm/vmx.c
>>>>     |  162 +++++++++++++++++++++++++++++++++++++-- 5 files
> changed, 173
>>>>  insertions(+), 12 deletions(-)
>>>> diff --git a/arch/x86/include/asm/kvm_host.h
>>>> b/arch/x86/include/asm/kvm_host.h index c431b33..35aa8e6 100644 ---
>>>> a/arch/x86/include/asm/kvm_host.h +++
>>>> b/arch/x86/include/asm/kvm_host.h @@ -697,6 +697,7 @@ struct
>>>> kvm_x86_ops {
>>>>    void (*enable_nmi_window)(struct kvm_vcpu *vcpu);       void
>>>>  (*enable_irq_window)(struct kvm_vcpu *vcpu);      void
>>>>  (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
>>>>  + void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
>>>>    int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);        int
>>>>  (*get_tdp_level)(void);   u64 (*get_mt_mask)(struct kvm_vcpu *vcpu,
>>>>  gfn_t gfn, bool is_mmio);
>>>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>>>> index 44c3f7e..0a54df0 100644
>>>> --- a/arch/x86/include/asm/vmx.h
>>>> +++ b/arch/x86/include/asm/vmx.h
>>>> @@ -139,6 +139,7 @@
>>>>  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define
>>>>  SECONDARY_EXEC_ENABLE_EPT               0x00000002 #define
>>>>  SECONDARY_EXEC_RDTSCP                     0x00000008 +#define
>>>>  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE   0x00000010 #define
>>>>  SECONDARY_EXEC_ENABLE_VPID              0x00000020 #define
>>>>  SECONDARY_EXEC_WBINVD_EXITING             0x00000040 #define
>>>>  SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>> index 0664c13..2ef5e2b 100644
>>>> --- a/arch/x86/kvm/lapic.c
>>>> +++ b/arch/x86/kvm/lapic.c
>>>> @@ -1323,12 +1323,17 @@ void kvm_lapic_set_base(struct kvm_vcpu
> *vcpu,
>>> u64 value)
>>>>    if (!kvm_vcpu_is_bsp(apic->vcpu))
>>>>            value &= ~MSR_IA32_APICBASE_BSP;
>>>> -  vcpu->arch.apic_base = value;
>>>> -  if (apic_x2apic_mode(apic)) {
>>>> -          u32 id = kvm_apic_id(apic);
>>>> -          u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
>>>> -          kvm_apic_set_ldr(apic, ldr);
>>>> +  if ((vcpu->arch.apic_base ^ value) & X2APIC_ENABLE) {
>>>> +          if (value & X2APIC_ENABLE) {
>>>> +                  u32 id = kvm_apic_id(apic);
>>>> +                  u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
>>>> +                  kvm_apic_set_ldr(apic, ldr);
>>>> +                  kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
>>>> +          } else
>>>> +                  kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
>>>>    }
>>>> +
>>>> +  vcpu->arch.apic_base = value;
>>>>    apic->base_address = apic->vcpu->arch.apic_base &
>>>>                         MSR_IA32_APICBASE_BASE;
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index d29d3cd..38407e9 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct
> kvm_vcpu
>>> *vcpu, int tpr, int irr)
>>>>            set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
>>>>  }
>>>> +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool
>>>> set) +{ +  return; +} +
>>>>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu) {       struct vcpu_svm
>>>>  *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops
>>>>  svm_x86_ops = {   .enable_nmi_window = enable_nmi_window,
>>>>    .enable_irq_window = enable_irq_window,         .update_cr8_intercept =
>>>>  update_cr8_intercept,
>>>> +  .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
>>>> 
>>>>    .set_tss_addr = svm_set_tss_addr,
>>>>    .get_tdp_level = get_npt_level,
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index 0403634..847022e 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -767,6 +767,12 @@ static inline bool
>>> cpu_has_vmx_virtualize_apic_accesses(void)
>>>>            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>>>  }
>>>> +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
>>>> +{
>>>> +  return vmcs_config.cpu_based_2nd_exec_ctrl &
>>>> +          SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>>>> +}
>>>> +
>>>>  static inline bool cpu_has_vmx_apic_register_virt(void)
>>>>  {
>>>>    return vmcs_config.cpu_based_2nd_exec_ctrl &
>>>> @@ -2543,6 +2549,7 @@ static __init int setup_vmcs_config(struct
>>> vmcs_config *vmcs_conf)
>>>>    if (_cpu_based_exec_control &
>>>>  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {          min2 = 0;               
>>>> opt2 =
>>>>  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>>>>  +                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
>>>>                    SECONDARY_EXEC_WBINVD_EXITING |         
>>>> SECONDARY_EXEC_ENABLE_VPID |
>>>>                    SECONDARY_EXEC_ENABLE_EPT | @@ -3724,7 +3731,45 @@ 
>>>> static void
>>>>  free_vpid(struct vcpu_vmx *vmx)   spin_unlock(&vmx_vpid_lock); }
>>>> -static void __vmx_disable_intercept_for_msr(unsigned long
>>>> *msr_bitmap, u32 msr) +#define MSR_TYPE_R  1 +#define MSR_TYPE_W   2
>>>> +static void __vmx_disable_intercept_for_msr(unsigned long
>>>> *msr_bitmap, +                                     u32 msr, int type) +{ + 
>>>> int f = sizeof(unsigned
>>>> long); + + if (!cpu_has_vmx_msr_bitmap()) +                return; + +     
>>>> /* +     * See
>>>> Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals +      *
>>>> have the write-low and read-high bitmap offsets the wrong way round.
>>>> +   * We can control MSRs 0x00000000-0x00001fff and
>>>> 0xc0000000-0xc0001fff. +    */ +   if (msr <= 0x1fff) { +          if 
>>>> (type &
>>>> MSR_TYPE_R) +                      /* read-low */ +                        
>>>> __clear_bit(msr, msr_bitmap +
>>>> 0x000 / f); + +            if (type & MSR_TYPE_W) +                        
>>>> /* write-low */ +
>>>>    __clear_bit(msr, msr_bitmap + 0x800 / f); + +   } else if ((msr >=
>>>> 0xc0000000) && (msr <= 0xc0001fff)) { +            msr &= 0x1fff; +        
>>>>         if (type &
>>>> MSR_TYPE_R) +      /* read-high */ +                       
>>>> __clear_bit(msr, msr_bitmap +
>>>> 0x400 / f); + +            if (type & MSR_TYPE_W) +                        
>>>> /* write-high */ +
>>>>    __clear_bit(msr, msr_bitmap + 0xc00 / f); + +   } +} + +static void
>>>> __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, +                
>>>>                         u32
>>>> msr, int type)
>>>>  {
>>>>    int f = sizeof(unsigned long);
>>>> @@ -3737,20 +3782,75 @@ static void
>>> __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
>>>>     * We can control MSRs 0x00000000-0x00001fff and
>>>>  0xc0000000-0xc0001fff.     */     if (msr <= 0x1fff) {
>>>> -          __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
>>>> -          __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
>>>> +          if (type & MSR_TYPE_R)
>>>> +                  /* read-low */
>>>> +                  __set_bit(msr, msr_bitmap + 0x000 / f);
>>>> +
>>>> +          if (type & MSR_TYPE_W)
>>>> +                  /* write-low */
>>>> +                  __set_bit(msr, msr_bitmap + 0x800 / f);
>>>> +
>>>>    } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
>>>>            msr &= 0x1fff;
>>>> -          __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
>>>> -          __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
>>>> +          if (type & MSR_TYPE_R)
>>>> +                  /* read-high */
>>>> +                  __set_bit(msr, msr_bitmap + 0x400 / f);
>>>> +
>>>> +          if (type & MSR_TYPE_W)
>>>> +                  /* write-high */
>>>> +                  __set_bit(msr, msr_bitmap + 0xc00 / f);
>>>> +
>>>>    } } + static void vmx_disable_intercept_for_msr(u32 msr, bool
>>>>  longmode_only) {  if (!longmode_only)
>>>> -          __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
>>>> -  __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
>>>> +          __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +        
>>>>                                         msr,
>>>> MSR_TYPE_R | MSR_TYPE_W);
>>>> +  __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
>>>> +                                          msr, MSR_TYPE_R | MSR_TYPE_W); 
>>>> +} + +static void
>>>> vmx_intercept_for_msr_read(u32 msr, bool longmode_only, +                  
>>>>                 bool
>>>> set) +{ +  if (!longmode_only) { +         if (set) +
>>>>    __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy, +                 
>>>>                 msr,
>>>> MSR_TYPE_R); +             else +
>>>>    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +                
>>>>                         msr,
>>>> MSR_TYPE_R); + +   } +     if (set)
>>>> +          __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, +       
>>>>                         msr,
>>>> MSR_TYPE_R); +     else
>>>> +          __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +      
>>>>                         msr,
>>>> MSR_TYPE_R); +} + +static void vmx_intercept_for_msr_write(u32 msr,
>>>> bool longmode_only, +                                      bool set) +{ +  
>>>> if (!longmode_only) { +         if
>>>> (set) +    __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy,
>>>> +                                  msr, MSR_TYPE_W); +             else +
>>>>    __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, +                
>>>>                         msr,
>>>> MSR_TYPE_W); + +   } +     if (set)
>>>> +          __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode, +       
>>>>                         msr,
>>>> MSR_TYPE_W); +     else
>>>> +          __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, +      
>>>>                         msr,
>>>> MSR_TYPE_W);
>>>>  }
>>>>  
>>>>  /*
>>>> @@ -3848,6 +3948,7 @@ static u32 vmx_secondary_exec_control(struct
>>> vcpu_vmx *vmx)
>>>>            exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;     if
>>>>  (!enable_apicv_reg)               exec_control &=
>>>>  ~SECONDARY_EXEC_APIC_REGISTER_VIRT; +     exec_control &=
>>>>  ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;   return
> exec_control; }
>>>> @@ -6103,6 +6204,52 @@ static void update_cr8_intercept(struct
> kvm_vcpu
>>> *vcpu, int tpr, int irr)
>>>>    vmcs_write32(TPR_THRESHOLD, irr);
>>>>  }
>>>> +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool
>>>> set) +{ +  u32 exec_control, sec_exec_control; +   int msr; +      struct
>>>> vcpu_vmx *vmx = to_vmx(vcpu); + +  /* There is not point to enable
>>>> virtualize x2apic without enable +  * apicv*/ +    if
>>>> (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) +
>>>>    return; + +     if (set) { +            exec_control =
>>>> vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); +          /* virtualize x2apic 
>>>> mode
>>>> relies on tpr shadow */ +          if (!(exec_control &
>>>> CPU_BASED_TPR_SHADOW)) +                   return; +       } + +   
>>>> sec_exec_control =
>>>> vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + +        if (set) {
>>>> +          sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>>> +          sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; +    
>>>> }
>>>> else { +           sec_exec_control &= 
>>>> ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>>>> +          if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) +
>>>>    sec_exec_control |= +                                   
>>>> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>>> +  } +     vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); +
>>>> +  for (msr = 0x800; msr <= 0x8ff; msr++) +
>>>>    vmx_intercept_for_msr_read(msr, false, !set); + +       if (set) { +    
>>>>         /*
>>>> According SDM, in x2apic mode, the whole id reg is used. +          * But 
>>>> in
>>>> KVM, it only use the highest eight bits. Need to +          * intercept 
>>>> it*/
>>>> +  vmx_intercept_for_msr_read(0x802, false, true); +               /* 
>>>> TMCCT */ +
>>>>    vmx_intercept_for_msr_read(0x839, false, true); +       } +     /* TPR 
>>>> */
>>>> +  vmx_intercept_for_msr_write(0x808, false, !set); +} +
>>>>  static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) {      u32
>>>>  exit_intr_info; @@ -7366,6 +7513,7 @@ static struct kvm_x86_ops
>>>>  vmx_x86_ops = {   .enable_nmi_window = enable_nmi_window,
>>>>    .enable_irq_window = enable_irq_window,         .update_cr8_intercept =
>>>>  update_cr8_intercept,
>>>> +  .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
>>>> 
>>>>    .set_tss_addr = vmx_set_tss_addr,
>>>>    .get_tdp_level = get_ept_level,
>>>> --
>>>> 1.7.1
>>> 
>>> --
>>>                     Gleb.
>> 
>> 
>> Best regards,
>> Yang
> 
> --
>                       Gleb.


Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to