[PATCH v2] KVM: x86: Revise guest_fpu xcomp_bv field

2021-02-24 Thread Jing Liu
XCOMP_BV[63] field indicates that the save area is in the compacted
format and XCOMP_BV[62:0] indicates the states that have space allocated
in the save area, including both XCR0 and XSS bits enabled by the host
kernel. Use xfeatures_mask_all for calculating xcomp_bv and reuse
XCOMP_BV_COMPACTED_FORMAT defined by kernel.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/x86.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b404e4d7dd8..f115493f577d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4435,8 +4435,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct 
kvm_vcpu *vcpu,
return 0;
 }
 
-#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
-
 static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 {
struct xregs_state *xsave = >arch.guest_fpu->state.xsave;
@@ -4494,7 +4492,8 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
/* Set XSTATE_BV and possibly XCOMP_BV.  */
xsave->header.xfeatures = xstate_bv;
if (boot_cpu_has(X86_FEATURE_XSAVES))
-   xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+   xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+xfeatures_mask_all;
 
/*
 * Copy each region from the non-compacted offset to the
@@ -9912,9 +9911,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
return;
 
fpstate_init(>arch.guest_fpu->state);
-   if (boot_cpu_has(X86_FEATURE_XSAVES))
-   vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
-   host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
/*
 * Ensure guest xcr0 is valid for loading
-- 
2.18.4



[PATCH v1] kvm: x86: Revise guest_fpu xcomp_bv field

2021-02-07 Thread Jing Liu
Bit 63 of the XCOMP_BV field indicates that the save area is in the
compacted format and the remaining bits indicate the states that have
space allocated in the save area, not only user states. Since
fpstate_init() has initialized xcomp_bv, let's just use that.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/x86.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b404e4d7dd8..f115493f577d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4435,8 +4435,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct 
kvm_vcpu *vcpu,
return 0;
 }
 
-#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
-
 static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 {
struct xregs_state *xsave = >arch.guest_fpu->state.xsave;
@@ -4494,7 +4492,8 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
/* Set XSTATE_BV and possibly XCOMP_BV.  */
xsave->header.xfeatures = xstate_bv;
if (boot_cpu_has(X86_FEATURE_XSAVES))
-   xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+   xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+xfeatures_mask_all;
 
/*
 * Copy each region from the non-compacted offset to the
@@ -9912,9 +9911,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
return;
 
fpstate_init(>arch.guest_fpu->state);
-   if (boot_cpu_has(X86_FEATURE_XSAVES))
-   vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
-   host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
/*
 * Ensure guest xcr0 is valid for loading
-- 
2.18.4



[PATCH RFC 5/7] kvm: x86: Revise CPUID.D.1.EBX for alignment rule

2021-02-06 Thread Jing Liu
CPUID.0xD.1.EBX[1] is set if, when the compacted format of an XSAVE
area is used, this extended state component located on the next
64-byte boundary following the preceding state component (otherwise,
it is located immediately following the preceding state component).

AMX tileconfig and tiledata are the first to use 64B alignment.
Revise the runtime cpuid modification for this rule.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/cpuid.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 04a73c395c71..ee1fac0a865e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -35,12 +35,17 @@ static u32 xstate_required_size(u64 xstate_bv, bool 
compacted)
 {
int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+   bool is_aligned = false;
 
xstate_bv &= XFEATURE_MASK_EXTEND;
while (xstate_bv) {
if (xstate_bv & 0x1) {
u32 eax, ebx, ecx, edx, offset;
cpuid_count(0xD, feature_bit, , , , );
+   /* ECX[2]: 64B alignment in compacted form */
+   is_aligned = !!(ecx & 2);
+   if (is_aligned && compacted)
+   ret = ALIGN(ret, 64);
offset = compacted ? ret : ebx;
ret = max(ret, offset + eax);
}
-- 
2.18.4



[PATCH RFC 7/7] kvm: x86: AMX XCR0 support for guest

2021-02-06 Thread Jing Liu
Two XCR0 bits are defined for AMX to support XSAVE mechanism.
Bit 17 is for tilecfg and bit 18 is for tiledata.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/x86.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfbde877221e..f1c5893dee18 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -189,7 +189,7 @@ static struct kvm_user_return_msrs __percpu 
*user_return_msrs;
 #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
-   | XFEATURE_MASK_PKRU)
+   | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
 
 u64 __read_mostly host_efer;
 EXPORT_SYMBOL_GPL(host_efer);
@@ -946,6 +946,12 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, 
u64 xcr)
if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
return 1;
}
+
+   if (xcr0 & XFEATURE_MASK_XTILE) {
+   if ((xcr0 & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE)
+   return 1;
+   }
+
vcpu->arch.xcr0 = xcr0;
 
if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
-- 
2.18.4



[PATCH RFC 4/7] kvm: x86: Add new ioctls for XSAVE extension

2021-02-06 Thread Jing Liu
The static xstate buffer kvm_xsave contains the extended register
states, but it is not enough for dynamic features with large state.

Introduce a new capability called KVM_CAP_X86_XSAVE_EXTENSION to
detect if hardware has XSAVE extension (XFD). Meanwhile, add two
new ioctl interfaces to get/set the whole xstate using struct
kvm_xsave_extension buffer containing both static and dynamic
xfeatures. Reuse fill_xsave and load_xsave for both cases.

Signed-off-by: Jing Liu 
---
 arch/x86/include/uapi/asm/kvm.h |  5 +++
 arch/x86/kvm/x86.c  | 70 +
 include/uapi/linux/kvm.h|  8 
 3 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 89e5f3d1bba8..bf785e89a728 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -362,6 +362,11 @@ struct kvm_xsave {
__u32 region[1024];
 };
 
+/* for KVM_CAP_XSAVE_EXTENSION */
+struct kvm_xsave_extension {
+   __u32 region[3072];
+};
+
 #define KVM_MAX_XCRS   16
 
 struct kvm_xcr {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15908bc65d1c..bfbde877221e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3786,6 +3786,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
case KVM_CAP_XCRS:
r = boot_cpu_has(X86_FEATURE_XSAVE);
break;
+   case KVM_CAP_X86_XSAVE_EXTENSION:
+   r = boot_cpu_has(X86_FEATURE_XSAVE) &&
+   boot_cpu_has(X86_FEATURE_XFD);
+   break;
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
break;
@@ -4395,7 +4399,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct 
kvm_vcpu *vcpu,
 
 #define XSTATE_COMPACTION_ENABLED (1ULL << 63)
 
-static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu, bool has_extension)
 {
struct xregs_state *xsave;
struct fpu *guest_fpu;
@@ -4403,9 +4407,14 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
u64 valid;
 
guest_fpu = vcpu->arch.guest_fpu;
-   xsave = _fpu->state.xsave;
+   xsave = __xsave(guest_fpu);
xstate_bv = xsave->header.xfeatures;
 
+   if (!has_extension) {
+   /* truncate with only non-dynamic features */
+   xstate_bv = xstate_bv & ~xfeatures_mask_user_dynamic;
+   }
+
/*
 * Copy legacy XSAVE area, to avoid complications with CPUID
 * leaves 0 and 1 in the loop below.
@@ -4450,7 +4459,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
u64 valid;
 
guest_fpu = vcpu->arch.guest_fpu;
-   xsave = _fpu->state.xsave;
+   xsave = __xsave(guest_fpu);
 
/*
 * Copy legacy XSAVE area, to avoid complications with CPUID
@@ -4488,29 +4497,31 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
}
 }
 
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
-struct kvm_xsave *guest_xsave)
+static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, u32 *region, 
bool has_extension)
 {
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
-   memset(guest_xsave, 0, sizeof(struct kvm_xsave));
-   fill_xsave((u8 *) guest_xsave->region, vcpu);
+   if (has_extension)
+   memset(region, 0, sizeof(struct kvm_xsave_extension));
+   else
+   memset(region, 0, sizeof(struct kvm_xsave));
+
+   fill_xsave((u8 *)region, vcpu, has_extension);
} else {
-   memcpy(guest_xsave->region,
+   memcpy(region,
>arch.guest_fpu->state.fxsave,
sizeof(struct fxregs_state));
-   *(u64 *)_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
+   *(u64 *)[XSAVE_HDR_OFFSET / sizeof(u32)] =
XFEATURE_MASK_FPSSE;
}
 }
 
 #define XSAVE_MXCSR_OFFSET 24
 
-static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
-   struct kvm_xsave *guest_xsave)
+static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, u32 *region)
 {
u64 xstate_bv =
-   *(u64 *)_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
-   u32 mxcsr = *(u32 *)_xsave->region[XSAVE_MXCSR_OFFSET / 
sizeof(u32)];
+   *(u64 *)[XSAVE_HDR_OFFSET / sizeof(u32)];
+   u32 mxcsr = *(u32 *)[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
/*
@@ -4520,13 +4531,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu 
*vcpu,
 */
if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
return -EINV

[PATCH RFC 6/7] kvm: x86: Add AMX_TILE, AMX_INT8 and AMX_BF16 support

2021-02-06 Thread Jing Liu
Intel introduces AMX architecture in SPR platform, which includes
AMX_TILE, AMX_INT8 and AMX_BF16 support.

Exposes these features to KVM guest.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/cpuid.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ee1fac0a865e..1b3ea9195a75 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -423,7 +423,8 @@ void kvm_set_cpu_caps(void)
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
-   F(SERIALIZE) | F(TSXLDTRK)
+   F(SERIALIZE) | F(TSXLDTRK) |
+   F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)
);
 
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
@@ -544,6 +545,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct 
kvm_cpuid_array *array,
case 0x14:
case 0x17:
case 0x18:
+   case 0x1d:
+   case 0x1e:
case 0x1f:
case 0x801d:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
@@ -667,6 +670,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array 
*array, u32 function)
break;
case 9:
break;
+   case 0x1e:
+   break;
case 0xa: { /* Architectural Performance Monitoring */
struct x86_pmu_capability cap;
union cpuid10_eax eax;
@@ -766,9 +771,12 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array 
*array, u32 function)
entry->edx = 0;
}
break;
+   /* Intel AMX TILE */
+   case 0x1d:
/* Intel PT */
case 0x14:
-   if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) {
+   if ((function == 0x14 && 
!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) ||
+   (function == 0x1d && 
!kvm_cpu_cap_has(X86_FEATURE_AMX_TILE))) {
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
}
-- 
2.18.4



[PATCH RFC 1/7] kvm: x86: Expose XFD CPUID to guest

2021-02-06 Thread Jing Liu
Intel's Extended Feature Disable (XFD) feature is an extension
to the XSAVE feature that allows an operating system to enable
a feature while preventing specific user threads from using
the feature. A processor that supports XFD enumerates
CPUID.(EAX=0DH,ECX=1):EAX[4] as 1.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 83637a2ff605..04a73c395c71 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -437,7 +437,7 @@ void kvm_set_cpu_caps(void)
);
 
kvm_cpu_cap_mask(CPUID_D_1_EAX,
-   F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
+   F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | F(XFD)
);
 
kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
-- 
2.18.4



[PATCH RFC 3/7] kvm: x86: XSAVE state and XFD MSRs context switch

2021-02-06 Thread Jing Liu
XFD allows the kernel to enable a feature state in XCR0 and to
receive a #NM trap when a task uses instructions accessing that state.
Kernel defines "struct fpu.state_mask" to indicate the saved xstate and
interact with the XFD hardware when needed via a simple conversion.
Once a dynamic feature is detected, "state_mask" is expanded and
"state_ptr" is dynamically allocated to hold the whole state. Meanwhile
once the state is not in INIT state, the corresponding XFD bit should
not be armed anymore.

In KVM, "guest_fpu" serves for any guest task working on this vcpu
during vmexit and vmenter. We provide a pre-allocated guest_fpu space
and entire "guest_fpu.state_mask" to avoid each dynamic features
detection on each vcpu task. Meanwhile, to ensure correctly
xsaves/xrstors guest state, set IA32_XFD as zero during vmexit and
vmenter.

For "current->thread.fpu", since host and guest probably have different
state and mask, it also need be switched to the right context when fpu
load and put.

Signed-off-by: Jing Liu 
---
 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kernel/fpu/init.c  |  1 +
 arch/x86/kernel/fpu/xstate.c|  2 +
 arch/x86/kvm/vmx/vmx.c  | 76 +
 arch/x86/kvm/vmx/vmx.h  |  1 +
 arch/x86/kvm/x86.c  | 69 +-
 6 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7e5f33a0d0e2..6dedf3d22659 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1203,6 +1203,9 @@ struct kvm_x86_ops {
   struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
+   void (*xfd_load)(struct kvm_vcpu *vcpu);
+   void (*xfd_put)(struct kvm_vcpu *vcpu);
+
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
 
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7e0c68043ce3..fbb761fc13ec 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,6 +145,7 @@ EXPORT_SYMBOL_GPL(fpu_kernel_xstate_min_size);
  * can be dynamically expanded to include some states up to this size.
  */
 unsigned int fpu_kernel_xstate_max_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_max_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 080f3be9a5e6..9c471a0364e2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -77,12 +77,14 @@ static struct xfeature_capflag_info xfeature_capflags[] 
__initdata = {
  * XSAVE buffer, both supervisor and user xstates.
  */
 u64 xfeatures_mask_all __read_mostly;
+EXPORT_SYMBOL_GPL(xfeatures_mask_all);
 
 /*
  * This represents user xstates, a subset of xfeatures_mask_all, saved in a
  * dynamic kernel XSAVE buffer.
  */
 u64 xfeatures_mask_user_dynamic __read_mostly;
+EXPORT_SYMBOL_GPL(xfeatures_mask_user_dynamic);
 
 static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] 
= -1};
 static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] 
= -1};
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7fa54e78c45c..be3cc0f3ec6d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1167,6 +1167,75 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
 }
 
+static void vmx_xfd_load(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   if (guest_cpuid_has(vcpu, X86_FEATURE_XFD)) {
+   vmx->host_ia32_xfd = 
xfirstuse_not_detected(vcpu->arch.user_fpu);
+   /*
+* Keep IA32_XFD as zero in hypervisor.
+* Guest non-zero IA32_XFD is restored until kvm_x86_ops.run
+*/
+   if (vmx->host_ia32_xfd)
+   wrmsrl(MSR_IA32_XFD, 0);
+   }
+}
+
+static void vmx_xfd_put(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   if (guest_cpuid_has(vcpu, X86_FEATURE_XFD)) {
+   /* IA32_XFD register is kept as zero in hypervisor. */
+   if (vmx->host_ia32_xfd)
+   wrmsrl(MSR_IA32_XFD, vmx->host_ia32_xfd);
+   /* User (qemu) IA32_XFD_ERR should be zero. */
+   if (vmx->msr_ia32_xfd_err)
+   wrmsrl(MSR_IA32_XFD_ERR, 0);
+   }
+}
+
+/* Load guest XFD MSRs before entering. */
+static void xfd_guest_enter(struct vcpu_vmx *vmx)
+{
+   if (guest_cpuid_has(>vcpu, X86_FEATURE_XFD)) {
+   if (vmx->msr_ia32_xfd)
+   wrmsrl(MSR_IA32_XFD, vmx->msr_ia32_xfd);
+   /*
+* We do n

[PATCH RFC 2/7] kvm: x86: Introduce XFD MSRs as passthrough to guest

2021-02-06 Thread Jing Liu
XFD feature introduces two new MSRs: IA32_XFD and IA32_XFD_ERR.
Each of the MSRs contains a state-component bitmap. XFD is enabled
for state component i if XCR0[i] = IA32_XFD[i] = 1. When XFD is
enabled for a state component, any instruction that would access
that state component does not execute and instead generates an
device-not-available exception (#NM). IA32_XFD_ERR is for
indicating which state causes the #NM event.

The MSRs are per task and need be context switched between host
and guest, and also between tasks inside guest just as native.
Passthrough both MSRs to let guest access and write without
vmexit. Add two slots for XFD MSRs as desired passthrough MSRs.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/vmx/vmx.c | 38 ++
 arch/x86/kvm/vmx/vmx.h |  6 +-
 arch/x86/kvm/x86.c |  6 ++
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 47b8357b9751..7fa54e78c45c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -162,6 +162,8 @@ static u32 
vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_IA32_SYSENTER_CS,
MSR_IA32_SYSENTER_ESP,
MSR_IA32_SYSENTER_EIP,
+   MSR_IA32_XFD,
+   MSR_IA32_XFD_ERR,
MSR_CORE_C1_RES,
MSR_CORE_C3_RESIDENCY,
MSR_CORE_C6_RESIDENCY,
@@ -1824,6 +1826,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 
msr_info->data = vmx->msr_ia32_umwait_control;
break;
+   case MSR_IA32_XFD:
+   if (!msr_info->host_initiated)
+   return 1;
+
+   msr_info->data = vmx->msr_ia32_xfd;
+   break;
+   case MSR_IA32_XFD_ERR:
+   if (!msr_info->host_initiated)
+   return 1;
+
+   msr_info->data = vmx->msr_ia32_xfd_err;
+   break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2026,6 +2040,20 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 
vmx->msr_ia32_umwait_control = data;
break;
+   case MSR_IA32_XFD:
+   if (!msr_info->host_initiated)
+   return 1;
+
+   vmx->msr_ia32_xfd = data;
+   break;
+   case MSR_IA32_XFD_ERR:
+   if (!msr_info->host_initiated)
+   return 1;
+   if (data)
+   break;
+
+   vmx->msr_ia32_xfd_err = data;
+   break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -7219,6 +7247,12 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
 }
 
+static void vmx_update_intercept_xfd(struct kvm_vcpu *vcpu)
+{
+   vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD, MSR_TYPE_RW, false);
+   vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_RW, false);
+}
+
 static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7249,6 +7283,10 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu 
*vcpu)
guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
update_intel_pt_cfg(vcpu);
 
+   if (boot_cpu_has(X86_FEATURE_XFD) &&
+   guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+   vmx_update_intercept_xfd(vcpu);
+
if (boot_cpu_has(X86_FEATURE_RTM)) {
struct vmx_uret_msr *msr;
msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index f6f66e5c6510..d487f5a53a08 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -281,11 +281,15 @@ struct vcpu_vmx {
struct pt_desc pt_desc;
 
/* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS  13
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS  15
struct {
DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
} shadow_msr_intercept;
+
+   /* eXtended Feature Disabling (XFD) MSRs */
+   u64 msr_ia32_xfd;
+   u64 msr_ia32_xfd_err;
 };
 
 enum ept_pointers_status {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 93b5bacad67a..9ca8b1e58afa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1213,6 +1213,7 @@ static const u32 msrs_to_save_all[] = {
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
MSR_IA32_UMWAIT_CONT

[PATCH RFC 0/7] Introduce support for guest AMX feature

2021-02-06 Thread Jing Liu
Intel introduces Advanced Matrix Extensions (AMX) [1] feature that
will be shipping soon. AMX consists of configurable two-dimensional
"TILE" registers and new accelerator instructions that operate on them.
TMUL (Tile matrix MULtiply) is the first accelerator instruction set
to use the new registers.

Intel AMX is XSAVE supported and XSAVE enabled. It is associated with
two state components, XTILECFG and XTILEDATA. The XTILEDATA state
component is very large so an XSAVE extension called extended feature
disable (XFD) is introduced to support dynamic usage. When XFD is
enabled for a state component, any instruction that would access
that state component does not execute and instead generates an #NM.
So Linux kernel arms XFD to monitor the first usage of AMX.

This patchset adds AMX and XFD support for guest: providing related
CPUID and MSRs to guest, adding extended XSAVE state context switch and
XFD MSRs switch during vmenter/vmexit. 

This RFC series is based on kernel AMX series v3 [2] in LKML though not
latest upstream commit and we'd looking forward for your comments.

[1]: Intel Architecture Instruction Set Extension Programming Reference

https://software.intel.com/content/dam/develop/external/us/en/documents/architecture-instruction-set-extensions-programming-reference.pdf

[2]: AMX kernel series v3 https://lkml.org/lkml/2020/12/23/464


Jing Liu (7):
  kvm: x86: Expose XFD CPUID to guest
  kvm: x86: Introduce XFD MSRs as passthrough to guest
  kvm: x86: Dynamic XSAVE and XFD MSRs context switch
  kvm: x86: Add new ioctls for XSAVE extension
  kvm: x86: Revise CPUID.D.1.EBX for alignment rule
  kvm: x86: Add AMX_TILE, AMX_INT8 and AMX_BF16 support
  kvm: x86: AMX XCR0 support for guest

 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/include/uapi/asm/kvm.h |   5 ++
 arch/x86/kernel/fpu/init.c  |   1 +
 arch/x86/kernel/fpu/xstate.c|   2 +
 arch/x86/kvm/cpuid.c|  19 +++-
 arch/x86/kvm/vmx/vmx.c  | 114 
 arch/x86/kvm/vmx/vmx.h  |   7 +-
 arch/x86/kvm/x86.c  | 153 ++--
 include/uapi/linux/kvm.h|   8 ++
 9 files changed, 279 insertions(+), 33 deletions(-)

-- 
2.18.4



Re: [PATCH v1] KVM: x86: expose AVX512_BF16 feature to guest

2019-07-15 Thread Jing Liu




On 7/15/2019 2:06 PM, Wanpeng Li wrote:

On Sat, 13 Jul 2019 at 18:40, Paolo Bonzini  wrote:


On 11/07/19 07:49, Jing Liu wrote:

AVX512 BFLOAT16 instructions support 16-bit BFLOAT16 floating-point
format (BF16) for deep learning optimization.

Intel adds AVX512 BFLOAT16 feature in CooperLake, which is CPUID.7.1.EAX[5].

Detailed information of the CPUID bit can be found here,
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf.

Signed-off-by: Jing Liu 
---


[...]

/home/kernel/data/kvm/arch/x86/kvm//cpuid.c: In function ‘do_cpuid_7_mask’:
./include/linux/kernel.h:819:29: warning: comparison of distinct
pointer types lacks a cast
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
  ^
./include/linux/kernel.h:833:4: note: in expansion of macro ‘__typecheck’
(__typecheck(x, y) && __no_side_effects(x, y))
 ^
./include/linux/kernel.h:843:24: note: in expansion of macro ‘__safe_cmp’
   __builtin_choose_expr(__safe_cmp(x, y), \
 ^
./include/linux/kernel.h:852:19: note: in expansion of macro ‘__careful_cmp’
  #define min(x, y) __careful_cmp(x, y, <)
^
/home/kernel/data/kvm/arch/x86/kvm//cpuid.c:377:16: note: in expansion
of macro ‘min’
entry->eax = min(entry->eax, 1);
 ^


Thanks for the information.

This warning would be fixed by changing to
entry->eax = min(entry->eax, (u32)1);

@Paolo, sorry for trouble. Would you mind if I re-send?

Jing



Re: [PATCH v1] KVM: x86: expose AVX512_BF16 feature to guest

2019-07-14 Thread Jing Liu

Hi Paolo,
Thanks for your reviewing! There also has Qemu patch sent here,
https://www.mail-archive.com/qemu-devel@nongnu.org/msg630359.html

Could you please review that? Thanks very much!

Jing


On 7/13/2019 6:37 PM, Paolo Bonzini wrote:

On 11/07/19 07:49, Jing Liu wrote:

AVX512 BFLOAT16 instructions support 16-bit BFLOAT16 floating-point
format (BF16) for deep learning optimization.

Intel adds AVX512 BFLOAT16 feature in CooperLake, which is CPUID.7.1.EAX[5].

Detailed information of the CPUID bit can be found here,
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf.

Signed-off-by: Jing Liu 
---

This patch depends on kernel patch https://lkml.org/lkml/2019/6/19/912
and Paolo's patch set https://lkml.org/lkml/2019/7/4/468.

  arch/x86/kvm/cpuid.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8fc6039..0c125dd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -358,9 +358,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 
*entry, int index)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR);
  
+	/* cpuid 7.1.eax */

+   const u32 kvm_cpuid_7_1_eax_x86_features =
+   F(AVX512_BF16);
+
switch (index) {
case 0:
-   entry->eax = 0;
+   entry->eax = min(entry->eax, 1);
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
cpuid_mask(>ebx, CPUID_7_0_EBX);
/* TSC_ADJUST is emulated */
@@ -384,6 +388,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 
*entry, int index)
 */
entry->edx |= F(ARCH_CAPABILITIES);
break;
+   case 1:
+   entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+   entry->ebx = 0;
+   entry->ecx = 0;
+   entry->edx = 0;
+   break;
default:
WARN_ON_ONCE(1);
entry->eax = 0;



Queued, thanks.

Paolo



[PATCH v1] KVM: x86: expose AVX512_BF16 feature to guest

2019-07-10 Thread Jing Liu
AVX512 BFLOAT16 instructions support 16-bit BFLOAT16 floating-point
format (BF16) for deep learning optimization.

Intel adds AVX512 BFLOAT16 feature in CooperLake, which is CPUID.7.1.EAX[5].

Detailed information of the CPUID bit can be found here,
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf.

Signed-off-by: Jing Liu 
---

This patch depends on kernel patch https://lkml.org/lkml/2019/6/19/912
and Paolo's patch set https://lkml.org/lkml/2019/7/4/468.

 arch/x86/kvm/cpuid.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8fc6039..0c125dd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -358,9 +358,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 
*entry, int index)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR);
 
+   /* cpuid 7.1.eax */
+   const u32 kvm_cpuid_7_1_eax_x86_features =
+   F(AVX512_BF16);
+
switch (index) {
case 0:
-   entry->eax = 0;
+   entry->eax = min(entry->eax, 1);
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
cpuid_mask(>ebx, CPUID_7_0_EBX);
/* TSC_ADJUST is emulated */
@@ -384,6 +388,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 
*entry, int index)
 */
entry->edx |= F(ARCH_CAPABILITIES);
break;
+   case 1:
+   entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+   entry->ebx = 0;
+   entry->ecx = 0;
+   entry->edx = 0;
+   break;
default:
WARN_ON_ONCE(1);
entry->eax = 0;
-- 
1.8.3.1



Re: [PATCH 2/5] KVM: cpuid: extract do_cpuid_7_mask and support multiple subleafs

2019-07-10 Thread Jing Liu




On 7/10/2019 2:30 PM, Paolo Bonzini wrote:

On 08/07/19 09:07, Jing Liu wrote:



And when adding subleaf 1, plan to add codes,

case 1:
 entry->eax |= kvm_cpuid_7_1_eax_x86_features;
 entry->ebx = entry->ecx = entry->edx =0;
 break;

What do you think?


This should be "&=", not "|=".  Otherwise yes, that's the idea.



Yes! So let me send out the BFloat16 patch based on your patch set now
or you have merge plan soon?

Thanks,
Jing


Paolo



Re: [PATCH 5/5] KVM: cpuid: remove has_leaf_count from struct kvm_cpuid_param

2019-07-08 Thread Jing Liu

Hi Paolo,

On 7/4/2019 10:07 PM, Paolo Bonzini wrote:

The has_leaf_count member was originally added for KVM's paravirtualization
CPUID leaves.  However, since then the leaf count _has_ been added to those
leaves as well, so we can drop that special case.

Signed-off-by: Paolo Bonzini 

[...]

@@ -835,11 +834,10 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
int limit, nent = 0, r = -E2BIG, i;
u32 func;
static const struct kvm_cpuid_param param[] = {
-   { .func = 0, .has_leaf_count = true },
-   { .func = 0x8000, .has_leaf_count = true },
-   { .func = 0xC000, .qualifier = is_centaur_cpu, 
.has_leaf_count = true },
+   { .func = 0 },
+   { .func = 0x8000 },
+   { .func = 0xC000, .qualifier = is_centaur_cpu },



{ .func = KVM_CPUID_SIGNATURE },
-   { .func = KVM_CPUID_FEATURES },


It seems the two func are introduced by 2b5e97e, as paravirtual cpuid.
But when searching KVM_CPUID_SIGNATURE, there seems no caller requesting
this cpuid. Meanwhile, I felt curious if KVM_CPUID_FEATURES is still in 
use but it seems kvm_update_cpuid() uses that. Not sure which spec 
introduces the latest pv cpuid.


Thanks,
Jing

[...]


Re: [PATCH 2/5] KVM: cpuid: extract do_cpuid_7_mask and support multiple subleafs

2019-07-08 Thread Jing Liu

Hi Paolo,

Thank you for refining the cpuid codes especially for case 7! It looks
much clear now!

On 7/4/2019 10:07 PM, Paolo Bonzini wrote:

CPUID function 7 has multiple subleafs.  Instead of having nested
switch statements, move the logic to filter supported features to
a separate function, and call it for each subleaf.

Signed-off-by: Paolo Bonzini 
---
Here you would have something like entry->eax = min(entry->eax, 1)
when adding subleaf 1.

  arch/x86/kvm/cpuid.c | 128 +++
  1 file changed, 81 insertions(+), 47 deletions(-)


[...]

+
+   switch (index) {
+   case 0:
+   entry->eax = 0;


Here, mark: when adding subleaf 1, change to
entry->eax = min(entry->eax, 1).


+   entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
+   cpuid_mask(>ebx, CPUID_7_0_EBX);
+   /* TSC_ADJUST is emulated */
+   entry->ebx |= F(TSC_ADJUST);
+
+   entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
+   f_la57 = entry->ecx & F(LA57);
+   cpuid_mask(>ecx, CPUID_7_ECX);
+   /* Set LA57 based on hardware capability. */
+   entry->ecx |= f_la57;
+   entry->ecx |= f_umip;
+   /* PKU is not yet implemented for shadow paging. */
+   if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+   entry->ecx &= ~F(PKU);
+
+   entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+   cpuid_mask(>edx, CPUID_7_EDX);
+   /*
+* We emulate ARCH_CAPABILITIES in software even
+* if the host doesn't support it.
+*/
+   entry->edx |= F(ARCH_CAPABILITIES);
+   break;

And when adding subleaf 1, plan to add codes,

case 1:
entry->eax |= kvm_cpuid_7_1_eax_x86_features;
entry->ebx = entry->ecx = entry->edx =0;
break;

What do you think?


+   default:
+   WARN_ON_ONCE(1);
+   entry->eax = 0;
+   entry->ebx = 0;
+   entry->ecx = 0;
+   entry->edx = 0;
+   break;
+   }
+}
+
  static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 
function,
  int *nent, int maxnent)

[...]

+   /* function 7 has additional index. */
case 7: {
+   int i;
+
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-   entry->eax = 0;
-   /* Mask ebx against host capability word 9 */
-   entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
-   cpuid_mask(>ebx, CPUID_7_0_EBX);
-   // TSC_ADJUST is emulated
-   entry->ebx |= F(TSC_ADJUST);
-   entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
-   f_la57 = entry->ecx & F(LA57);
-   cpuid_mask(>ecx, CPUID_7_ECX);
-   /* Set LA57 based on hardware capability. */
-   entry->ecx |= f_la57;
-   entry->ecx |= f_umip;
-   /* PKU is not yet implemented for shadow paging. */
-   if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
-   entry->ecx &= ~F(PKU);
-   entry->edx &= kvm_cpuid_7_0_edx_x86_features;
-   cpuid_mask(>edx, CPUID_7_EDX);
-   /*
-* We emulate ARCH_CAPABILITIES in software even
-* if the host doesn't support it.
-*/
-   entry->edx |= F(ARCH_CAPABILITIES);
+   for (i = 0; ; ) {
+   do_cpuid_7_mask([i], i);
+   if (i == entry->eax)
+   break;
+   if (*nent >= maxnent)
+   goto out;
+
+   ++i;
+   do_cpuid_1_ent([i], function, i);
+   entry[i].flags |=
+  KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+   ++*nent;
+   }


The new logic is great and adding subleaf support would be much easier!


break;
}
case 9:



Thanks,
Jing


Re: [PATCH 1/5] KVM: cpuid: do_cpuid_ent works on a whole CPUID function

2019-07-08 Thread Jing Liu

Hi Paolo,

On 7/4/2019 10:07 PM, Paolo Bonzini wrote:

Rename it as well as __do_cpuid_ent and __do_cpuid_ent_emulated to have
"func" in its name, and drop the index parameter which is always 0.

Signed-off-by: Paolo Bonzini 
---
  arch/x86/kvm/cpuid.c | 89 +---
  1 file changed, 42 insertions(+), 47 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 004cbd84c351..ddffc56c39b4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -294,14 +294,19 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
  {
entry->function = function;
entry->index = index;
+   entry->flags = 0;
+


I'm wondering if we need set entry->flags = 0 here?
entry->flags was initialized as zero when vzalloc.


cpuid_count(entry->function, entry->index,
>eax, >ebx, >ecx, >edx);
-   entry->flags = 0;
  }
  
-static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,

-  u32 func, u32 index, int *nent, int maxnent)
+static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
+   u32 func, int *nent, int maxnent)
  {
+   entry->function = func;
+   entry->index = 0;
+   entry->flags = 0;
+


The same question for flags and index, because entry is allocated
by vzalloc.


switch (func) {
case 0:
entry->eax = 7;
@@ -313,21 +318,18 @@ static int __do_cpuid_ent_emulated(struct 
kvm_cpuid_entry2 *entry,
break;
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-   if (index == 0)
-   entry->ecx = F(RDPID);
+   entry->eax = 0;
+   entry->ecx = F(RDPID);
++*nent;
default:
break;
}
  
-	entry->function = func;

-   entry->index = index;
-
return 0;
  }
  



Thanks,
Jing


Re: [PATCH RFC] kvm: x86: Expose AVX512_BF16 feature to guest

2019-06-24 Thread Jing Liu

Hi Paolo,

On 6/24/2019 4:33 PM, Paolo Bonzini wrote:

On 24/06/19 05:10, Jing Liu wrote:

What do you think about @index in current function? Does it mean, we
need put cpuid from index to max subleaf to @entry[i]? If so, the logic
seems as follows,

if (index == 0) {
  // Put subleaf 0 into @entry
  // Put subleaf 1 into @entry[1]
} else if (index < entry->eax) {
  // Put subleaf 1 into @entry
} else {
  // Put all zero into @entry
}

But this seems not identical with other cases, for current caller
function. Or we can simply ignore @index in 0x07 and just put all
possible subleaf info back?


There are indeed quite some cleanups to be made there.  Let me post a
series as soon as possible, and you can base your work on it.



Thanks. I just had another mail (replying you in this serial) appending
some codes to deal with case 7. If you prefer to firstly cleanup, I can
wait for the patch then. :)

Thanks,
Jing


Paolo



Re: [PATCH RFC] kvm: x86: Expose AVX512_BF16 feature to guest

2019-06-23 Thread Jing Liu

Hi Paolo,

After thinking more, I found way to satisfy all cases in a easy way.
How about things like this?

@@ -507,12 +510,26 @@ static inline int __do_cpuid_ent(struct 
kvm_cpuid_entry2 *entry, u32 fu

 * if the host doesn't support it.
 */
entry->edx |= F(ARCH_CAPABILITIES);
+   } else if (index == 1) {
+   entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+   entry->ebx = 0;
+   entry->ecx = 0;
+   entry->edx = 0;
} else {
+   entry->eax = 0;
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
}
-   entry->eax = 0;
+
+   if (index == 0 && entry->eax >= 1) {
+   entry[1].eax &= kvm_cpuid_7_1_eax_x86_features;
+   entry[1].ebx = 0;
+   entry[1].ecx = 0;
+   entry[1].edx = 0;
+   entry[1].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+   ++*nent;
+   }
break;
}


Or you prefer that I update this into another version later?

Thanks!
Jing

On 6/20/2019 11:09 PM, Liu, Jing2 wrote:

Hi Paolo,

On 6/20/2019 8:16 PM, Paolo Bonzini wrote:

On 20/06/19 13:21, Jing Liu wrote:

+    for (i = 1; i <= times; i++) {
+    if (*nent >= maxnent)
+    goto out;
+    do_cpuid_1_ent([i], function, i);
+    entry[i].eax &= F(AVX512_BF16);
+    entry[i].ebx = 0;
+    entry[i].ecx = 0;
+    entry[i].edx = 0;
+    entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+    ++*nent;


This woud be wrong for i > 1, so instead make this

if (entry->eax >= 1)



I am confused about the @index parameter. @index seems not used for
every case except 0x07. Since the caller function only has @index=0, so
all other cases except 0x07 put cpuid info from subleaf=0 to max subleaf.

What do you think about @index in current function? Does it mean, we
need put cpuid from index to max subleaf to @entry[i]? If so, the logic
seems as follows,

if (index == 0) {
     // Put subleaf 0 into @entry
     // Put subleaf 1 into @entry[1]
} else if (index < entry->eax) {
     // Put subleaf 1 into @entry
} else {
     // Put all zero into @entry
}

But this seems not identical with other cases, for current caller
function. Or we can simply ignore @index in 0x07 and just put all possible
subleaf info back?


and define F(AVX512_BF16) as a new constant kvm_cpuid_7_1_eax_features.


Got it.


Thanks,
Jing


Paolo



[PATCH RFC] kvm: x86: Expose AVX512_BF16 feature to guest

2019-06-20 Thread Jing Liu
AVX512 BFLOAT16 instructions support 16-bit BFLOAT16 floating-point
format (BF16) for deep learning optimization.

Intel adds AVX512 BFLOAT16 feature in CooperLake, which is CPUID.7.1.EAX[5].

Detailed information of the CPUID bit can be found here,
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf.

Signed-off-by: Jing Liu 
---
 arch/x86/kvm/cpuid.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e18a9f9..10be53f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -484,6 +484,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
entry->edx = 0;
break;
case 7: {
+   int i, times = entry->eax;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* Mask ebx against host capability word 9 */
if (index == 0) {
@@ -507,12 +508,23 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 
*entry, u32 function,
 * if the host doesn't support it.
 */
entry->edx |= F(ARCH_CAPABILITIES);
-   } else {
+   } else if (index > times) {
+   entry->eax = 0;
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
}
-   entry->eax = 0;
+   for (i = 1; i <= times; i++) {
+   if (*nent >= maxnent)
+   goto out;
+   do_cpuid_1_ent([i], function, i);
+   entry[i].eax &= F(AVX512_BF16);
+   entry[i].ebx = 0;
+   entry[i].ecx = 0;
+   entry[i].edx = 0;
+   entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+   ++*nent;
+   }
break;
}
case 9:
-- 
1.8.3.1



[PATCH RFC] kvm: x86: AVX512_BF16 feature support

2019-06-20 Thread Jing Liu
The patch focuses on a new instruction AVX512_BF16 support for kvm guest, 
defined
as CPUID.(EAX=7,ECX=1):EAX[bit 5], see spec[1].

The kvm implementation depends on kernel patch[2] which is in lkml discussion.

References:
[1] https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf
[2] https://lkml.org/lkml/2019/6/19/912

Jing Liu (1):
  kvm: x86: Expose AVX512_BF16 feature to guest

 arch/x86/kvm/cpuid.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

-- 
1.8.3.1