On 01/06/16 14:28, Jan Beulich wrote: >>>> On 01.06.16 at 15:03, <andrew.coop...@citrix.com> wrote: >> On 01/06/16 13:01, Jan Beulich wrote: >>>>>> I want to adjust the representation of cpuid information in struct >>>>>> domain. The current loop in domain_cpuid() causes an O(N) overhead for >>>>>> every query, which is very poor for actions which really should be a >>>>>> single bit test at a fixed offset. >>>>>> >>>>>> This needs to be combined with properly splitting the per-domain and >>>>>> per-vcpu information, which requires knowing the expected vcpu topology >>>>>> during domain creation. >>>>>> >>>>>> On top of that, there needs to be verification logic to check the >>>>>> correctness of information passed from the toolstack. >>>>>> >>>>>> All of these areas are covered in the "known issues" section of the >>>>>> feature doc, and I do plan to fix them all. However, it isn't a couple >>>>>> of hours worth of work. >>>>> All understood, yet not to the point: The original remark was that >>>>> the very XSTATE handling could be done better with far not as much >>>>> of a change, at least afaict without having tried. >>>> In which case I don't know what you were suggesting. >>> Make {hvm,pv}_cpuid() invoke themselves recursively to >>> determine what bits to mask off from CPUID[0xd].EAX. >> So that would work. However, to do this, you need to query leaves 1, >> 0x80000001 and 7, all of which will hit the O(N) loop in domain_cpuid() >> >> Luckily, none of those specific paths further recurse into {hvm,pv}_cpuid(). >> >> I am unsure which to go with. My gut feel is that this would be quite a >> performance hit, but I have no evidence either way. OTOH, it will give >> the correct answer, rather than an approximation. > Not only since I believe performance is very close to irrelevant for > CPUID leaf 0xD invocations, I think I'd prefer correctness over > performance (as would be basically always the case). How about > you?
Right - this is the alternative, doing the calculation in {hvm,pv}_cpuid(), based on top of your cleanup from yesterday. There is a bugfix in the PV side (pv_featureset[FEATURESET_1c] should be taken into account even for control/hardware domain accesses), and a preemptive fix on the HVM side to avoid advertising any XSS states, as we don't support any yet. Thoughts? ~Andrew
>From be056a4b78929e428d50ca386ec56b42c9cde9ac Mon Sep 17 00:00:00 2001 From: Andrew Cooper <andrew.coop...@citrix.com> Date: Thu, 2 Jun 2016 12:08:42 +0100 Subject: [PATCH] xen/x86: Clip guests view of xfeature_mask --- xen/arch/x86/hvm/hvm.c | 46 +++++++++++++++++++++++++++++-- xen/arch/x86/traps.c | 65 +++++++++++++++++++++++++++++++++++++------- xen/include/asm-x86/xstate.h | 31 ++++++++++++++------- 3 files changed, 120 insertions(+), 22 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index bb98051..53f2bdd 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -3362,7 +3362,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, switch ( input ) { - unsigned int _ecx, _edx; + unsigned int _ebx, _ecx, _edx; case 0x1: /* Fix up VLAPIC details. */ @@ -3443,6 +3443,44 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, switch ( count ) { case 0: + { + uint64_t xfeature_mask = XSTATE_SSE | XSTATE_FP; + uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; + + if ( _ecx & cpufeat_mask(X86_FEATURE_AVX) ) + { + xfeature_mask |= XSTATE_YMM; + xstate_size += xstate_sizes[_XSTATE_YMM]; + } + + _ecx = 0; + hvm_cpuid(7, NULL, &_ebx, &_ecx, NULL); + + if ( _ebx & cpufeat_mask(X86_FEATURE_MPX) ) + { + xfeature_mask |= XSTATE_BNDREGS | XSTATE_BNDCSR; + xstate_size += xstate_sizes[_XSTATE_BNDREGS] + + xstate_sizes[_XSTATE_BNDCSR]; + } + + if ( _ebx & cpufeat_mask(X86_FEATURE_PKU) ) + { + xfeature_mask |= XSTATE_PKRU; + xstate_size += xstate_sizes[_XSTATE_PKRU]; + } + + hvm_cpuid(0x80000001, NULL, NULL, &_ecx, NULL); + + if ( _ecx & cpufeat_mask(X86_FEATURE_LWP) ) + { + xfeature_mask |= XSTATE_LWP; + xstate_size += xstate_sizes[_XSTATE_LWP]; + } + + *eax = (uint32_t)xfeature_mask; + *edx = (uint32_t)(xfeature_mask >> 32); + *ecx = xstate_size; + /* * Always read CPUID[0xD,0].EBX from hardware, rather than domain * policy. It varies with enabled xstate, and the correct xcr0 is @@ -3450,6 +3488,8 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, */ cpuid_count(input, count, &dummy, ebx, &dummy, &dummy); break; + } + case 1: *eax &= hvm_featureset[FEATURESET_Da1]; @@ -3463,7 +3503,9 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, cpuid_count(input, count, &dummy, ebx, &dummy, &dummy); } else - *ebx = *ecx = *edx = 0; + *ebx = 0; + + *ecx = *edx = 0; break; } break; diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 5d7232d..4ba90a4 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -928,7 +928,7 @@ void pv_cpuid(struct cpu_user_regs *regs) switch ( leaf ) { - uint32_t tmp; + uint32_t tmp, _ebx, _ecx, _edx; case 0x00000001: c &= pv_featureset[FEATURESET_1c]; @@ -1087,19 +1087,63 @@ void pv_cpuid(struct cpu_user_regs *regs) break; case XSTATE_CPUID: - if ( !((!is_control_domain(currd) && !is_hardware_domain(currd) - ? ({ - uint32_t ecx; - - domain_cpuid(currd, 1, 0, &tmp, &tmp, &ecx, &tmp); - ecx & pv_featureset[FEATURESET_1c]; - }) - : cpuid_ecx(1)) & cpufeat_mask(X86_FEATURE_XSAVE)) || - subleaf >= 63 ) + + if ( !is_control_domain(currd) && !is_hardware_domain(currd) ) + domain_cpuid(currd, 1, 0, &tmp, &tmp, &_ecx, &tmp); + else + _ecx = cpuid_ecx(1); + _ecx &= pv_featureset[FEATURESET_1c]; + + if ( !(_ecx & cpufeat_mask(X86_FEATURE_XSAVE)) || subleaf >= 63 ) goto unsupported; switch ( subleaf ) { case 0: + { + uint64_t xfeature_mask = XSTATE_SSE | XSTATE_FP; + uint32_t xstate_size = XSTATE_AREA_MIN_SIZE; + + if ( _ecx & cpufeat_mask(X86_FEATURE_AVX) ) + { + xfeature_mask |= XSTATE_YMM; + xstate_size += xstate_sizes[_XSTATE_YMM]; + } + + if ( !is_control_domain(currd) && !is_hardware_domain(currd) ) + domain_cpuid(currd, 7, 0, &tmp, &_ebx, &tmp, &tmp); + else + cpuid_count(7, 0, &tmp, &_ebx, &tmp, &tmp); + _ebx &= pv_featureset[FEATURESET_7b0]; + + if ( _ebx & cpufeat_mask(X86_FEATURE_MPX) ) + { + xfeature_mask |= XSTATE_BNDREGS | XSTATE_BNDCSR; + xstate_size += xstate_sizes[_XSTATE_BNDREGS] + + xstate_sizes[_XSTATE_BNDCSR]; + } + + if ( _ebx & cpufeat_mask(X86_FEATURE_PKU) ) + { + xfeature_mask |= XSTATE_PKRU; + xstate_size += xstate_sizes[_XSTATE_PKRU]; + } + + if ( !is_control_domain(currd) && !is_hardware_domain(currd) ) + domain_cpuid(currd, 0x80000001, 0, &tmp, &tmp, &tmp, &_edx); + else + _edx = cpuid_edx(0x80000001); + _edx &= pv_featureset[FEATURESET_e1d]; + + if ( _ecx & cpufeat_mask(X86_FEATURE_LWP) ) + { + xfeature_mask |= XSTATE_LWP; + xstate_size += xstate_sizes[_XSTATE_LWP]; + } + + a = (uint32_t)xfeature_mask; + d = (uint32_t)(xfeature_mask >> 32); + c = xstate_size; + /* * Always read CPUID.0xD[ECX=0].EBX from hardware, rather than * domain policy. It varies with enabled xstate, and the correct @@ -1108,6 +1152,7 @@ void pv_cpuid(struct cpu_user_regs *regs) if ( !is_control_domain(currd) && !is_hardware_domain(currd) ) cpuid_count(leaf, subleaf, &tmp, &b, &tmp, &tmp); break; + } case 1: a &= pv_featureset[FEATURESET_Da1]; diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h index 4535354..4b0c33e 100644 --- a/xen/include/asm-x86/xstate.h +++ b/xen/include/asm-x86/xstate.h @@ -26,16 +26,27 @@ #define XSAVE_HDR_OFFSET FXSAVE_SIZE #define XSTATE_AREA_MIN_SIZE (FXSAVE_SIZE + XSAVE_HDR_SIZE) -#define XSTATE_FP (1ULL << 0) -#define XSTATE_SSE (1ULL << 1) -#define XSTATE_YMM (1ULL << 2) -#define XSTATE_BNDREGS (1ULL << 3) -#define XSTATE_BNDCSR (1ULL << 4) -#define XSTATE_OPMASK (1ULL << 5) -#define XSTATE_ZMM (1ULL << 6) -#define XSTATE_HI_ZMM (1ULL << 7) -#define XSTATE_PKRU (1ULL << 9) -#define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */ +#define _XSTATE_FP 0 +#define XSTATE_FP (1ULL << _XSTATE_FP) +#define _XSTATE_SSE 1 +#define XSTATE_SSE (1ULL << _XSTATE_SSE) +#define _XSTATE_YMM 2 +#define XSTATE_YMM (1ULL << _XSTATE_YMM) +#define _XSTATE_BNDREGS 3 +#define XSTATE_BNDREGS (1ULL << _XSTATE_BNDREGS) +#define _XSTATE_BNDCSR 4 +#define XSTATE_BNDCSR (1ULL << _XSTATE_BNDCSR) +#define _XSTATE_OPMASK 5 +#define XSTATE_OPMASK (1ULL << _XSTATE_OPMASK) +#define _XSTATE_ZMM 6 +#define XSTATE_ZMM (1ULL << _XSTATE_ZMM) +#define _XSTATE_HI_ZMM 7 +#define XSTATE_HI_ZMM (1ULL << _XSTATE_HI_ZMM) +#define _XSTATE_PKRU 9 +#define XSTATE_PKRU (1ULL << _XSTATE_PKRU) +#define _XSTATE_LWP 62 +#define XSTATE_LWP (1ULL << _XSTATE_LWP) + #define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE) #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | \ XSTATE_ZMM | XSTATE_HI_ZMM | XSTATE_NONLAZY) -- 2.1.4
_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel