If max leaf is greater than 0xd but xsave not available to the guest, then the
current XSAVE size should not be filled in.  This is a latent bug for now as
the guest max leaf is 0xd, but will become problematic in the future.

The comment concerning XSS state is wrong.  VT-x doesn't manage host/guest
state automatically, but there is provision for "host only" bits to be set, so
the implications are still accurate.

Introduce {xstate,hw}_compressed_size() helpers to mirror the uncompressed
ones.

This in turn higlights a bug in xstate_init().  Defaulting this_cpu(xss) to ~0
requires a forced write to clear it back out.  This in turn highlights that
it's only a safe default on systems with XSAVES.

Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com>
---
CC: Jan Beulich <jbeul...@suse.com>
CC: Roger Pau Monné <roger....@citrix.com>

The more I think about it, the more I think that cross-checking with hardware
is a bad move.  It's horribly expensive, and for supervisor states in
particular, liable to interfere with functionality.

v2:
 * Cope with blind reads of CPUID.0xD[1] prior to %xcr0 having been set up.
---
 xen/arch/x86/cpuid.c              | 24 ++++--------
 xen/arch/x86/include/asm/xstate.h |  1 +
 xen/arch/x86/xstate.c             | 64 ++++++++++++++++++++++++++++++-
 3 files changed, 72 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 7a38e032146a..a822e80c7ea7 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -330,23 +330,15 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
     case XSTATE_CPUID:
         switch ( subleaf )
         {
-        case 1:
-            if ( !p->xstate.xsavec && !p->xstate.xsaves )
-                break;
-
-            /*
-             * TODO: Figure out what to do for XSS state.  VT-x manages host
-             * vs guest MSR_XSS automatically, so as soon as we start
-             * supporting any XSS states, the wrong XSS will be in context.
-             */
-            BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0);
-            fallthrough;
         case 0:
-            /*
-             * Read CPUID[0xD,0/1].EBX from hardware.  They vary with enabled
-             * XSTATE, and appropriate XCR0|XSS are in context.
-             */
-            res->b = cpuid_count_ebx(leaf, subleaf);
+            if ( p->basic.xsave )
+                res->b = xstate_uncompressed_size(v->arch.xcr0);
+            break;
+
+        case 1:
+            if ( p->xstate.xsavec )
+                res->b = xstate_compressed_size(v->arch.xcr0 |
+                                                v->arch.msrs->xss.raw);
             break;
         }
         break;
diff --git a/xen/arch/x86/include/asm/xstate.h 
b/xen/arch/x86/include/asm/xstate.h
index bfb66dd766b6..da1d89d2f416 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -109,6 +109,7 @@ void xstate_free_save_area(struct vcpu *v);
 int xstate_alloc_save_area(struct vcpu *v);
 void xstate_init(struct cpuinfo_x86 *c);
 unsigned int xstate_uncompressed_size(uint64_t xcr0);
+unsigned int xstate_compressed_size(uint64_t xstates);
 
 static inline uint64_t xgetbv(unsigned int index)
 {
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index a94f4025fce5..b2cf3ae6acee 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -614,6 +614,65 @@ unsigned int xstate_uncompressed_size(uint64_t xcr0)
     return size;
 }
 
+static unsigned int hw_compressed_size(uint64_t xstates)
+{
+    uint64_t curr_xcr0 = get_xcr0(), curr_xss = get_msr_xss();
+    unsigned int size;
+    bool ok;
+
+    ok = set_xcr0(xstates & ~XSTATE_XSAVES_ONLY);
+    ASSERT(ok);
+    set_msr_xss(xstates & XSTATE_XSAVES_ONLY);
+
+    size = cpuid_count_ebx(XSTATE_CPUID, 1);
+
+    ok = set_xcr0(curr_xcr0);
+    ASSERT(ok);
+    set_msr_xss(curr_xss);
+
+    return size;
+}
+
+unsigned int xstate_compressed_size(uint64_t xstates)
+{
+    unsigned int i, size = XSTATE_AREA_MIN_SIZE;
+
+    if ( xstates == 0 ) /* TODO: clean up paths passing 0 in here. */
+        return 0;
+
+    if ( xstates <= (X86_XCR0_SSE | X86_XCR0_FP) )
+        return size;
+
+    /*
+     * For the compressed size, every component matters.  Some are
+     * automatically rounded up to 64 first.
+     */
+    xstates &= ~XSTATE_FP_SSE;
+    for_each_set_bit ( i, &xstates, 63 )
+    {
+        if ( test_bit(i, &xstate_align) )
+            size = ROUNDUP(size, 64);
+
+        size += xstate_sizes[i];
+    }
+
+    /* In debug builds, cross-check our calculation with hardware. */
+    if ( IS_ENABLED(CONFIG_DEBUG) )
+    {
+        unsigned int hwsize;
+
+        xstates |= XSTATE_FP_SSE;
+        hwsize = hw_compressed_size(xstates);
+
+        if ( size != hwsize )
+            printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n",
+                        __func__, xstates, size, hwsize);
+        size = hwsize;
+    }
+
+    return size;
+}
+
 static bool valid_xcr0(uint64_t xcr0)
 {
     /* FP must be unconditionally set. */
@@ -681,7 +740,8 @@ void xstate_init(struct cpuinfo_x86 *c)
      * write it.
      */
     this_cpu(xcr0) = 0;
-    this_cpu(xss) = ~0;
+    if ( cpu_has_xsaves )
+        this_cpu(xss) = ~0;
 
     cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
     feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK;
@@ -694,6 +754,8 @@ void xstate_init(struct cpuinfo_x86 *c)
     set_in_cr4(X86_CR4_OSXSAVE);
     if ( !set_xcr0(feature_mask) )
         BUG();
+    if ( cpu_has_xsaves )
+        set_msr_xss(0);
 
     if ( bsp )
     {
-- 
2.30.2


Reply via email to