Re: [Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads

2020-03-26 Thread Tian, Kevin
> From: Jan Beulich 
> Sent: Tuesday, March 24, 2020 8:37 PM
> 
> If the hardware can handle accesses, we should allow it to do so. This
> way we can expose EFRO to HVM guests, and "all" that's left for exposing
> APERF/MPERF is to figure out how to handle writes to these MSRs. (Note
> that the leaf 6 guest CPUID checks will evaluate to false for now, as
> recalculate_misc() zaps the entire leaf for now.)
> 
> For TSC the intercepts are made mirror the RDTSC ones.
> 
> Signed-off-by: Jan Beulich 

Reviewed-by: Kevin Tian 


[Xen-devel] [PATCH v5 09/10] x86/HVM: don't needlessly intercept APERF/MPERF/TSC MSR reads

2020-03-24 Thread Jan Beulich
If the hardware can handle accesses, we should allow it to do so. This
way we can expose EFRO to HVM guests, and "all" that's left for exposing
APERF/MPERF is to figure out how to handle writes to these MSRs. (Note
that the leaf 6 guest CPUID checks will evaluate to false for now, as
recalculate_misc() zaps the entire leaf for now.)

For TSC the intercepts are made mirror the RDTSC ones.

Signed-off-by: Jan Beulich 
---
v4: Make TSC intercepts mirror RDTSC ones. Re-base.
v3: New.

--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -595,6 +595,7 @@ static void svm_cpuid_policy_changed(str
 struct vmcb_struct *vmcb = svm->vmcb;
 const struct cpuid_policy *cp = v->domain->arch.cpuid;
 u32 bitmap = vmcb_get_exception_intercepts(vmcb);
+unsigned int mode;
 
 if ( opt_hvm_fep ||
  (v->domain->arch.cpuid->x86_vendor != boot_cpu_data.x86_vendor) )
@@ -607,6 +608,17 @@ static void svm_cpuid_policy_changed(str
 /* Give access to MSR_PRED_CMD if the guest has been told about it. */
 svm_intercept_msr(v, MSR_PRED_CMD,
   cp->extd.ibpb ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW);
+
+/* Allow direct reads from APERF/MPERF if permitted by the policy. */
+mode = cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY
+   ? MSR_INTERCEPT_WRITE : MSR_INTERCEPT_RW;
+svm_intercept_msr(v, MSR_IA32_APERF, mode);
+svm_intercept_msr(v, MSR_IA32_MPERF, mode);
+
+/* Allow direct access to their r/o counterparts if permitted. */
+mode = cp->extd.efro ? MSR_INTERCEPT_NONE : MSR_INTERCEPT_RW;
+svm_intercept_msr(v, MSR_APERF_RD_ONLY, mode);
+svm_intercept_msr(v, MSR_MPERF_RD_ONLY, mode);
 }
 
 void svm_sync_vmcb(struct vcpu *v, enum vmcb_sync_state new_state)
@@ -860,7 +872,10 @@ static void svm_set_rdtsc_exiting(struct
 {
 general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
 general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
+svm_enable_intercept_for_msr(v, MSR_IA32_TSC);
 }
+else
+svm_intercept_msr(v, MSR_IA32_TSC, MSR_INTERCEPT_WRITE);
 
 vmcb_set_general1_intercepts(vmcb, general1_intercepts);
 vmcb_set_general2_intercepts(vmcb, general2_intercepts);
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -108,6 +108,7 @@ static int construct_vmcb(struct vcpu *v
 {
 vmcb->_general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
 vmcb->_general2_intercepts |= GENERAL2_INTERCEPT_RDTSCP;
+svm_intercept_msr(v, MSR_IA32_TSC, MSR_INTERCEPT_WRITE);
 }
 
 /* Guest segment limits. */
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1141,8 +1141,13 @@ static int construct_vmcs(struct vcpu *v
 vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW);
 vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW);
 vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW);
+
+if ( !(v->arch.hvm.vmx.exec_control & CPU_BASED_RDTSC_EXITING) )
+vmx_clear_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R);
+
 if ( paging_mode_hap(d) && (!is_iommu_enabled(d) || iommu_snoop) )
 vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
+
 if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) &&
  (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) )
 vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW);
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -585,6 +585,18 @@ static void vmx_cpuid_policy_changed(str
 vmx_clear_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
 else
 vmx_set_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
+
+/* Allow direct reads from APERF/MPERF if permitted by the policy. */
+if ( cp->basic.raw[6].c & CPUID6_ECX_APERFMPERF_CAPABILITY )
+{
+vmx_clear_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R);
+vmx_clear_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R);
+}
+else
+{
+vmx_set_msr_intercept(v, MSR_IA32_APERF, VMX_MSR_R);
+vmx_set_msr_intercept(v, MSR_IA32_MPERF, VMX_MSR_R);
+}
 }
 
 int vmx_guest_x86_mode(struct vcpu *v)
@@ -1250,7 +1262,12 @@ static void vmx_set_rdtsc_exiting(struct
 vmx_vmcs_enter(v);
 v->arch.hvm.vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING;
 if ( enable )
+{
 v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING;
+vmx_set_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R);
+}
+else
+vmx_clear_msr_intercept(v, MSR_IA32_TSC, VMX_MSR_R);
 vmx_update_cpu_exec_control(v);
 vmx_vmcs_exit(v);
 }
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -243,7 +243,7 @@ XEN_CPUFEATURE(ENQCMD,6*32+29) /
 
 /* AMD-defined CPU features, CPUID level 0x8007.edx, word 7 */
 XEN_CPUFEATURE(ITSC,  7*32+ 8) /*   Invariant TSC */
-XEN_CPUFEATURE(EFRO,  7*32+10) /*   APERF/MPERF Read Only interface */
+XEN_CPUFEATURE(EFRO,