On 06/14/2012 05:04 AM, Mao, Junjie wrote:
> This patch handles PCID/INVPCID for guests.
> 
> Process-context identifiers (PCIDs) are a facility by which a logical 
> processor
> may cache information for multiple linear-address spaces so that the processor
> may retain cached information when software switches to a different linear
> address space. Refer to section 4.10.1 in IA32 Intel Software Developer's 
> Manual
> Volume 3A for details.
> 
> For guests with EPT, the PCID feature is enabled and INVPCID behaves as 
> running
> natively.
> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
> 
> 
>  #endif
>       unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> +     unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;

Unneeded, just put F(PCID) below.

> +     unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
>  
>       /* cpuid 1.edx */
>       const u32 kvm_supported_word0_x86_features =
> @@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, 
> u32 function,
>               0 /* DS-CPL, VMX, SMX, EST */ |
>               0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
>               F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> -             0 /* Reserved, DCA */ | F(XMM4_1) |
> +             f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |


> @@ -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
>       return cpu_has_vmx_rdtscp();
>  }
>  
> +static bool vmx_invpcid_supported(void)
> +{
> +     return cpu_has_vmx_invpcid();

Should that have && ept_enabled?  You turn off invpcid below if
!ept_enabled.

> +}
> +
>  /*
>   * Swap MSR entry in host/guest MSR entry array.
>   */
> @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct vmcs_config 
> *vmcs_conf)
>                       SECONDARY_EXEC_ENABLE_EPT |
>                       SECONDARY_EXEC_UNRESTRICTED_GUEST |
>                       SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> -                     SECONDARY_EXEC_RDTSCP;
> +                     SECONDARY_EXEC_RDTSCP |
> +                     SECONDARY_EXEC_ENABLE_INVPCID;
>               if (adjust_vmx_controls(min2, opt2,
>                                       MSR_IA32_VMX_PROCBASED_CTLS2,
>                                       &_cpu_based_2nd_exec_control) < 0)
> @@ -3731,6 +3743,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx 
> *vmx)
>       if (!enable_ept) {
>               exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
>               enable_unrestricted_guest = 0;
> +             /* Enable INVPCID for non-ept guests may cause performance 
> regression. */
> +             exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
>       }
>       if (!enable_unrestricted_guest)
>               exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;

This code is unneeded..

> @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>                       }
>               }
>       }
> +
> +     if (vmx_invpcid_supported()) {
> +             exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> +             /* Exposing INVPCID only when PCID is exposed */
> +             best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> +             if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) && 
> guest_cpuid_has_pcid(vcpu)) {
> +                     exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
> +                     vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> +                                  exec_control);
> +             } else {
> +                     exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> +                     vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> +                                  exec_control);
> +                     if (best)
> +                             best->ecx &= ~bit(X86_FEATURE_INVPCID);
> +             }
> +     }
>  }

Since you override it here anyway.  But maybe it's needed if the host
never calls KVM_SET_CPUID.

>  
>  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
> @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, 
> struct vmcs12 *vmcs12)
>                                 page_to_phys(vmx->nested.apic_access_page));
>               }
>  
> +             /* Explicitly disable INVPCID until PCID for L2 guest is 
> supported */
> +             exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> +

We can't just disable it without the guest knowing.  If we don't expose
INCPCID through the MSR, then we should fail VMKLAUNCH or VMRESUME is
this bit is set.

>               vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
>       }
>  
> @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>                       return 1;
>       }
>  
> +     if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> +         kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> +             return 1;

Why check old_cr0?

>  
> +     if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> +             if (!guest_cpuid_has_pcid(vcpu))
> +                     return 1;
> +
> +             /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> +             if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || 
> !is_long_mode(vcpu))
> +                     return 1;
> +     }
> +
>       if (kvm_x86_ops->set_cr4(vcpu, cr4))
>               return 1;
>  
> -     if ((cr4 ^ old_cr4) & pdptr_bits)
> +     if (((cr4 ^ old_cr4) & pdptr_bits) ||
> +         (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
>               kvm_mmu_reset_context(vcpu);


You can do


  if ((cr4 ^ old_cr4) & (pdptr_bits | X86_CR4_PCIDE))
     ...

> @@ -626,8 +640,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
>       }
>  
>       if (is_long_mode(vcpu)) {
> -             if (cr3 & CR3_L_MODE_RESERVED_BITS)
> -                     return 1;
> +             if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
> +                     if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
> +                             return 1;
> +             } else
> +                     if (cr3 & CR3_L_MODE_RESERVED_BITS)
> +                             return 1;
>       } else {
>               if (is_pae(vcpu)) {
>                       if (cr3 & CR3_PAE_RESERVED_BITS)

I'm worried about the order of writes in
kvm_arch_vcpu_ioctl_set_sregs().  We might end up in a situation where
we we can't load all registers due to all those checks.


-- 
error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to