If max leaf is greater than 0xd but xsave not available to the guest, then the current XSAVE size should not be filled in. This is a latent bug for now as the guest max leaf is 0xd, but will become problematic in the future.
The comment concerning XSS state is wrong. VT-x doesn't manage host/guest state automatically, but there is provision for "host only" bits to be set, so the implications are still accurate. Introduce {xstate,hw}_compressed_size() helpers to mirror the uncompressed ones. This in turn higlights a bug in xstate_init(). Defaulting this_cpu(xss) to ~0 requires a forced write to clear it back out. This in turn highlights that it's only a safe default on systems with XSAVES. Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com> --- CC: Jan Beulich <jbeul...@suse.com> CC: Roger Pau Monné <roger....@citrix.com> The more I think about it, the more I think that cross-checking with hardware is a bad move. It's horribly expensive, and for supervisor states in particular, liable to interfere with functionality. v2: * Cope with blind reads of CPUID.0xD[1] prior to %xcr0 having been set up. --- xen/arch/x86/cpuid.c | 24 ++++-------- xen/arch/x86/include/asm/xstate.h | 1 + xen/arch/x86/xstate.c | 64 ++++++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 17 deletions(-) diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c index 7a38e032146a..a822e80c7ea7 100644 --- a/xen/arch/x86/cpuid.c +++ b/xen/arch/x86/cpuid.c @@ -330,23 +330,15 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, case XSTATE_CPUID: switch ( subleaf ) { - case 1: - if ( !p->xstate.xsavec && !p->xstate.xsaves ) - break; - - /* - * TODO: Figure out what to do for XSS state. VT-x manages host - * vs guest MSR_XSS automatically, so as soon as we start - * supporting any XSS states, the wrong XSS will be in context. - */ - BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0); - fallthrough; case 0: - /* - * Read CPUID[0xD,0/1].EBX from hardware. They vary with enabled - * XSTATE, and appropriate XCR0|XSS are in context. - */ - res->b = cpuid_count_ebx(leaf, subleaf); + if ( p->basic.xsave ) + res->b = xstate_uncompressed_size(v->arch.xcr0); + break; + + case 1: + if ( p->xstate.xsavec ) + res->b = xstate_compressed_size(v->arch.xcr0 | + v->arch.msrs->xss.raw); break; } break; diff --git a/xen/arch/x86/include/asm/xstate.h b/xen/arch/x86/include/asm/xstate.h index bfb66dd766b6..da1d89d2f416 100644 --- a/xen/arch/x86/include/asm/xstate.h +++ b/xen/arch/x86/include/asm/xstate.h @@ -109,6 +109,7 @@ void xstate_free_save_area(struct vcpu *v); int xstate_alloc_save_area(struct vcpu *v); void xstate_init(struct cpuinfo_x86 *c); unsigned int xstate_uncompressed_size(uint64_t xcr0); +unsigned int xstate_compressed_size(uint64_t xstates); static inline uint64_t xgetbv(unsigned int index) { diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c index a94f4025fce5..b2cf3ae6acee 100644 --- a/xen/arch/x86/xstate.c +++ b/xen/arch/x86/xstate.c @@ -614,6 +614,65 @@ unsigned int xstate_uncompressed_size(uint64_t xcr0) return size; } +static unsigned int hw_compressed_size(uint64_t xstates) +{ + uint64_t curr_xcr0 = get_xcr0(), curr_xss = get_msr_xss(); + unsigned int size; + bool ok; + + ok = set_xcr0(xstates & ~XSTATE_XSAVES_ONLY); + ASSERT(ok); + set_msr_xss(xstates & XSTATE_XSAVES_ONLY); + + size = cpuid_count_ebx(XSTATE_CPUID, 1); + + ok = set_xcr0(curr_xcr0); + ASSERT(ok); + set_msr_xss(curr_xss); + + return size; +} + +unsigned int xstate_compressed_size(uint64_t xstates) +{ + unsigned int i, size = XSTATE_AREA_MIN_SIZE; + + if ( xstates == 0 ) /* TODO: clean up paths passing 0 in here. */ + return 0; + + if ( xstates <= (X86_XCR0_SSE | X86_XCR0_FP) ) + return size; + + /* + * For the compressed size, every component matters. Some are + * automatically rounded up to 64 first. + */ + xstates &= ~XSTATE_FP_SSE; + for_each_set_bit ( i, &xstates, 63 ) + { + if ( test_bit(i, &xstate_align) ) + size = ROUNDUP(size, 64); + + size += xstate_sizes[i]; + } + + /* In debug builds, cross-check our calculation with hardware. */ + if ( IS_ENABLED(CONFIG_DEBUG) ) + { + unsigned int hwsize; + + xstates |= XSTATE_FP_SSE; + hwsize = hw_compressed_size(xstates); + + if ( size != hwsize ) + printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n", + __func__, xstates, size, hwsize); + size = hwsize; + } + + return size; +} + static bool valid_xcr0(uint64_t xcr0) { /* FP must be unconditionally set. */ @@ -681,7 +740,8 @@ void xstate_init(struct cpuinfo_x86 *c) * write it. */ this_cpu(xcr0) = 0; - this_cpu(xss) = ~0; + if ( cpu_has_xsaves ) + this_cpu(xss) = ~0; cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK; @@ -694,6 +754,8 @@ void xstate_init(struct cpuinfo_x86 *c) set_in_cr4(X86_CR4_OSXSAVE); if ( !set_xcr0(feature_mask) ) BUG(); + if ( cpu_has_xsaves ) + set_msr_xss(0); if ( bsp ) { -- 2.30.2