On 1/11/2022 10:30 AM, Tian, Kevin wrote:
From: Zeng, Guang <guang.z...@intel.com> Sent: Monday, January 10, 2022 5:47 PMOn 1/10/2022 4:40 PM, Tian, Kevin wrote:From: Zhong, Yang <yang.zh...@intel.com> Sent: Friday, January 7, 2022 5:32 PM From: Jing Liu <jing2....@intel.com> Extended feature has large state while current kvm_xsave only allows 4KB. Use new XSAVE ioctls if the xstate size is large than kvm_xsave.shouldn't we always use the new xsave ioctls as long as CAP_XSAVE2 is available?CAP_XSAVE2 may return legacy xsave size or 0 working with old kvm version in which it's not available. QEMU just use the new xsave ioctls only when the return value of CAP_XSAVE2 is larger than legacy xsave size.CAP_XSAVE2 is the superset of CAP_XSAVE. If available it can support both legacy 4K size or bigger.
Got your point now. We can use new ioctl once CAP_XSAVE2 is available. As your suggestion, I'd like to change commit log as follows: "x86: Use new XSAVE ioctls handling Extended feature has large state while current kvm_xsave only allows 4KB. Use new XSAVE ioctls if check extension of CAP_XSAVE2 is available." And introduce has_xsave2 to indicate the valid of CAP_XSAVE2 with following change: diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 97520e9dff..c8dae88ced 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -116,6 +116,7 @@ static bool has_msr_ucode_rev; static bool has_msr_vmx_procbased_ctls2; static bool has_msr_perf_capabs; static bool has_msr_pkrs; +static bool has_xsave2 = false; static uint32_t has_architectural_pmu_version; static uint32_t num_architectural_pmu_gp_counters; @@ -1986,7 +1987,8 @@ int kvm_arch_init_vcpu(CPUState *cs)uint32_t size = kvm_vm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
if (!size) { size = sizeof(struct kvm_xsave); - } + } else + has_xsave2 = true; env->xsave_buf_len = QEMU_ALIGN_UP(size, 4096); env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); @@ -3253,7 +3255,7 @@ static int kvm_get_xsave(X86CPU *cpu) return kvm_get_fpu(cpu); } - if (env->xsave_buf_len <= sizeof(struct kvm_xsave)) { + if (!has_xsave2) { ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); } else { ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE2, xsave);
Signed-off-by: Jing Liu <jing2....@intel.com> Signed-off-by: Zeng Guang <guang.z...@intel.com> Signed-off-by: Wei Wang <wei.w.w...@intel.com> Signed-off-by: Yang Zhong <yang.zh...@intel.com> --- linux-headers/asm-x86/kvm.h | 14 ++++++++++++++ linux-headers/linux/kvm.h | 2 ++ target/i386/cpu.h | 5 +++++ target/i386/kvm/kvm.c | 16 ++++++++++++++-- target/i386/xsave_helper.c | 35+++++++++++++++++++++++++++++++++++5 files changed, 70 insertions(+), 2 deletions(-) diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 5a776a08f7..32f2a921e8 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -376,6 +376,20 @@ struct kvm_debugregs { /* for KVM_CAP_XSAVE */ struct kvm_xsave { __u32 region[1024]; + /* + * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes + * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * respectively, when invoked on the vm file descriptor. + * + * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) + * will always be at least 4096. Currently, it is only greater + * than 4096 if a dynamic feature has been enabled with + * ``arch_prctl()``, but this may change in the future. + * + * The offsets of the state save areas in struct kvm_xsave follow + * the contents of CPUID leaf 0xD on the host. + */ + __u32 extra[0]; }; #define KVM_MAX_XCRS 16 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 02c5e7b7bb..97d5b6d81d 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_XSAVE2 207 #ifdef KVM_CAP_IRQ_ROUTING @@ -1550,6 +1551,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_XSAVE */ #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) #define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave) +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 245e8b5a1a..6153c4ab1a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1519,6 +1519,11 @@ typedef struct CPUX86State { YMMReg zmmh_regs[CPU_NB_REGS]; ZMMReg hi16_zmm_regs[CPU_NB_REGS]; +#ifdef TARGET_X86_64 + uint8_t xtilecfg[64]; + uint8_t xtiledata[8192]; +#endif + /* sysenter registers */ uint32_t sysenter_cs; target_ulong sysenter_esp; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 3fb3ddbe2b..97520e9dff 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1983,7 +1983,12 @@ int kvm_arch_init_vcpu(CPUState *cs) } if (has_xsave) { - env->xsave_buf_len = sizeof(struct kvm_xsave); + uint32_t size = kvm_vm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + if (!size) { + size = sizeof(struct kvm_xsave); + } + + env->xsave_buf_len = QEMU_ALIGN_UP(size, 4096); env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); memset(env->xsave_buf, 0, env->xsave_buf_len); @@ -2580,6 +2585,7 @@ static int kvm_put_xsave(X86CPU *cpu) if (!has_xsave) { return kvm_put_fpu(cpu); } + x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len); return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); @@ -3247,10 +3253,16 @@ static int kvm_get_xsave(X86CPU *cpu) return kvm_get_fpu(cpu); } - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); + if (env->xsave_buf_len <= sizeof(struct kvm_xsave)) { + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); + } else { + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE2, xsave); + } + if (ret < 0) { return ret; } + x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len); return 0; diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c index ac61a96344..090424e820 100644 --- a/target/i386/xsave_helper.c +++ b/target/i386/xsave_helper.c @@ -5,6 +5,7 @@ #include "qemu/osdep.h" #include "cpu.h" +#include <asm/kvm.h> void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) { @@ -126,6 +127,23 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu,void*buf, uint32_t buflen) memcpy(pkru, &env->pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + XSaveXTILE_CFG *tilecfg = buf + e->offset; + + memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); + } + + if (buflen > sizeof(struct kvm_xsave)) { + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + + if (e->size && e->offset) { + XSaveXTILE_DATA *tiledata = buf + e->offset; + + memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); + } + } #endif } @@ -247,5 +265,22 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu,constvoid *buf, uint32_t buflen) pkru = buf + e->offset; memcpy(&env->pkru, pkru, sizeof(env->pkru)); } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + const XSaveXTILE_CFG *tilecfg = buf + e->offset; + + memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); + } + + if (buflen > sizeof(struct kvm_xsave)) { + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + + if (e->size && e->offset) { + const XSaveXTILE_DATA *tiledata = buf + e->offset; + + memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); + } + } #endif }