On 05/20/2010 12:16 PM, Sheng Yang wrote:
From: Dexuan Cui<[email protected]>

Enable XSAVE/XRSTORE for guest.

Change from V2:
Addressed comments from Avi.

Change from V1:

1. Use FPU API.
2. Fix CPUID issue.
3. Save/restore all possible guest xstate fields when switching. Because we
don't know which fields guest has already touched.


diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d08bb4a..3938bd1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
        } update_pte;

        struct fpu guest_fpu;
+       u64 xcr0;

        gva_t mmio_fault_cr2;
        struct kvm_pio_request pio;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9e6779f..346ea66 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -266,6 +266,7 @@ enum vmcs_field {
  #define EXIT_REASON_EPT_VIOLATION       48
  #define EXIT_REASON_EPT_MISCONFIG       49
  #define EXIT_REASON_WBINVD            54
+#define EXIT_REASON_XSETBV             55

  /*
   * Interruption-information format
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 99ae513..a63f206 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -36,6 +36,8 @@
  #include<asm/vmx.h>
  #include<asm/virtext.h>
  #include<asm/mce.h>
+#include<asm/i387.h>
+#include<asm/xcr.h>

  #include "trace.h"

@@ -247,6 +249,9 @@ static const u32 vmx_msr_index[] = {
  };
  #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)

+#define MERGE_TO_U64(low, high) \
+               (((low)&  -1u) | ((u64)((high)&  -1u)<<  32))
+

static inline u64 kvm_read_edx_eax(vcpu) in cache_regs.h


+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+       u64 new_bv = MERGE_TO_U64(kvm_register_read(vcpu, VCPU_REGS_RAX),
+               kvm_register_read(vcpu, VCPU_REGS_RDX));
+
+       if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
+               goto err;
+       if (vmx_get_cpl(vcpu) != 0)
+               goto err;
+       if (!(new_bv&  XSTATE_FP))
+               goto err;
+       if ((new_bv&  XSTATE_YMM)&&  !(new_bv&  XSTATE_SSE))
+               goto err;

What about a check against unknown bits?

+       vcpu->arch.xcr0 = new_bv;
+       xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+       skip_emulated_instruction(vcpu);
+       return 1;
+err:
+       kvm_inject_gp(vcpu, 0);
+       return 1;
+}
+
  static int handle_apic_access(struct kvm_vcpu *vcpu)
  {
        return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;


+static u64 host_xcr0;

__read_mostly.

+
+static void update_cpuid(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 1, 0);
+       if (!best)
+               return;
+
+       /* Update OSXSAVE bit */
+       if (cpu_has_xsave&&  best->function == 0x1) {
+               best->ecx&= ~(bit(X86_FEATURE_OSXSAVE));
+               if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
+                       best->ecx |= bit(X86_FEATURE_OSXSAVE);
+       }
+}

Note: need to update after userspace writes cpuid as well.

+
  int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -481,6 +513,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        if (cr4&  CR4_RESERVED_BITS)
                return 1;

+       if (!guest_cpuid_has_xsave(vcpu)&&  (cr4&  X86_CR4_OSXSAVE))
+               return 1;
+
        if (is_long_mode(vcpu)) {
                if (!(cr4&  X86_CR4_PAE))
                        return 1;
@@ -497,6 +532,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        if ((cr4 ^ old_cr4)&  pdptr_bits)
                kvm_mmu_reset_context(vcpu);

+       if ((cr4 ^ old_cr4)&  X86_CR4_OSXSAVE)
+               update_cpuid(vcpu);
+

I think we need to reload the guest's xcr0 at this point. Alternatively, call vmx_load_host_state() to ensure the the next entry will reload it.

@@ -1931,7 +1964,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, 
u32 function,

        switch (function) {
        case 0:
-               entry->eax = min(entry->eax, (u32)0xb);
+               entry->eax = min(entry->eax, (u32)0xd);

Do we need any special handling for leaf 0xc?

@@ -4567,6 +4616,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        kvm_x86_ops->prepare_guest_switch(vcpu);
        if (vcpu->fpu_active)
                kvm_load_guest_fpu(vcpu);
+       if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
+               xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);

Better done in vmx_save_host_state(), so we only do it on context switches or entries from userspace.

kvm_read_cr4_bits() is faster - doesn't need a vmcs_readl().


        atomic_set(&vcpu->guest_mode, 1);
        smp_wmb();
@@ -5118,6 +5169,10 @@ void fx_init(struct kvm_vcpu *vcpu)
        fpu_alloc(&vcpu->arch.guest_fpu);
        fpu_finit(&vcpu->arch.guest_fpu);

+       /* Ensure guest xcr0 is valid for loading */
+       if (cpu_has_xsave)
+               vcpu->arch.xcr0 = XSTATE_FP;
+

Can do it unconditionally, not that it matters much.

  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  {
        if (vcpu->guest_fpu_loaded)
@@ -5134,6 +5197,10 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)

        vcpu->guest_fpu_loaded = 1;
        unlazy_fpu(current);
+       /* Restore all possible states in the guest */
+       if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
+               xsetbv(XCR_XFEATURE_ENABLED_MASK,
+                       cpuid_get_possible_xcr0(vcpu));

Best to calculate it out of the fast path, when guest cpuid is set. Need to check it at this time as well.

Also can avoid it if guest xcr0 == host xcr0.

        fpu_restore_checking(&vcpu->arch.guest_fpu);
        trace_kvm_fpu(1);
  }
@@ -5144,7 +5211,14 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
                return;

        vcpu->guest_fpu_loaded = 0;
+       /* Save all possible states in the guest */
+       if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
+               xsetbv(XCR_XFEATURE_ENABLED_MASK,
+                       cpuid_get_possible_xcr0(vcpu));

Ditto.

        fpu_save_init(&vcpu->arch.guest_fpu);
+       if (cpu_has_xsave)
+               xsetbv(XCR_XFEATURE_ENABLED_MASK,
+                       host_xcr0);
        ++vcpu->stat.fpu_reload;
        set_bit(KVM_REQ_DEACTIVATE_FPU,&vcpu->requests);
        trace_kvm_fpu(0);


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to