Author: grehan
Date: Thu Sep 10 10:49:59 2020
New Revision: 365560
URL: https://svnweb.freebsd.org/changeset/base/365560

Log:
  MFC r364340, r364343, r364656
  
    r364340    Support guest rdtscp and rdpid instructions on Intel VT-x
  
    Follow-on commits:
    r364343    Export a routine to provide the TSC_AUX MSR value and use this 
in vmm
    r364656    assert caller is preventing CPU migration
  
    Submitted by:       adam_fenn.io
    Differential Revision: https://reviews.freebsd.org/D26003

Modified:
  stable/12/sys/amd64/amd64/initcpu.c
  stable/12/sys/amd64/include/vmm.h
  stable/12/sys/amd64/vmm/intel/vmx.c
  stable/12/sys/amd64/vmm/intel/vmx.h
  stable/12/sys/amd64/vmm/intel/vmx_msr.c
  stable/12/sys/amd64/vmm/intel/vmx_msr.h
  stable/12/sys/amd64/vmm/x86.c
  stable/12/sys/i386/i386/initcpu.c
  stable/12/sys/x86/include/x86_var.h
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/amd64/amd64/initcpu.c
==============================================================================
--- stable/12/sys/amd64/amd64/initcpu.c Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/amd64/amd64/initcpu.c Thu Sep 10 10:49:59 2020        
(r365560)
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
+#include <machine/psl.h>
 #include <machine/specialreg.h>
 
 #include <vm/vm.h>
@@ -218,6 +219,18 @@ init_via(void)
 }
 
 /*
+ * The value for the TSC_AUX MSR and rdtscp/rdpid on the invoking CPU.
+ *
+ * Caller should prevent CPU migration.
+ */
+u_int
+cpu_auxmsr(void)
+{
+       KASSERT((read_rflags() & PSL_I) == 0, ("context switch possible"));
+       return (PCPU_GET(cpuid));
+}
+
+/*
  * Initialize CPU control registers
  */
 void
@@ -283,7 +296,7 @@ initializecpu(void)
 
        if ((amd_feature & AMDID_RDTSCP) != 0 ||
            (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
-               wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid));
+               wrmsr(MSR_TSC_AUX, cpu_auxmsr());
 }
 
 void

Modified: stable/12/sys/amd64/include/vmm.h
==============================================================================
--- stable/12/sys/amd64/include/vmm.h   Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/amd64/include/vmm.h   Thu Sep 10 10:49:59 2020        
(r365560)
@@ -436,6 +436,8 @@ enum vm_cap_type {
        VM_CAP_UNRESTRICTED_GUEST,
        VM_CAP_ENABLE_INVPCID,
        VM_CAP_BPT_EXIT,
+       VM_CAP_RDPID,
+       VM_CAP_RDTSCP,
        VM_CAP_MAX
 };
 

Modified: stable/12/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- stable/12/sys/amd64/vmm/intel/vmx.c Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/amd64/vmm/intel/vmx.c Thu Sep 10 10:49:59 2020        
(r365560)
@@ -160,6 +160,14 @@ static int cap_pause_exit;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, pause_exit, CTLFLAG_RD, &cap_pause_exit,
     0, "PAUSE triggers a VM-exit");
 
+static int cap_rdpid;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdpid, CTLFLAG_RD, &cap_rdpid, 0,
+    "Guests are allowed to use RDPID");
+
+static int cap_rdtscp;
+SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, rdtscp, CTLFLAG_RD, &cap_rdtscp, 0,
+    "Guests are allowed to use RDTSCP");
+
 static int cap_unrestricted_guest;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, unrestricted_guest, CTLFLAG_RD,
     &cap_unrestricted_guest, 0, "Unrestricted guests");
@@ -293,6 +301,18 @@ static int vmx_getreg(void *arg, int vcpu, int reg, ui
 static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
 static void vmx_inject_pir(struct vlapic *vlapic);
 
+static inline bool
+host_has_rdpid(void)
+{
+       return ((cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0);
+}
+
+static inline bool
+host_has_rdtscp(void)
+{
+       return ((amd_feature & AMDID_RDTSCP) != 0);
+}
+
 #ifdef KTR
 static const char *
 exit_reason_to_str(int reason)
@@ -745,6 +765,43 @@ vmx_init(int ipinum)
                                         PROCBASED_PAUSE_EXITING, 0,
                                         &tmp) == 0);
 
+       /*
+        * Check support for RDPID and/or RDTSCP.
+        *
+        * Support a pass-through-based implementation of these via the
+        * "enable RDTSCP" VM-execution control and the "RDTSC exiting"
+        * VM-execution control.
+        *
+        * The "enable RDTSCP" VM-execution control applies to both RDPID
+        * and RDTSCP (see SDM volume 3, section 25.3, "Changes to
+        * Instruction Behavior in VMX Non-root operation"); this is why
+        * only this VM-execution control needs to be enabled in order to
+        * enable passing through whichever of RDPID and/or RDTSCP are
+        * supported by the host.
+        *
+        * The "RDTSC exiting" VM-execution control applies to both RDTSC
+        * and RDTSCP (again, per SDM volume 3, section 25.3), and is
+        * already set up for RDTSC and RDTSCP pass-through by the current
+        * implementation of RDTSC.
+        *
+        * Although RDPID and RDTSCP are optional capabilities, since there
+        * does not currently seem to be a use case for enabling/disabling
+        * these via libvmmapi, choose not to support this and, instead,
+        * just statically always enable or always disable this support
+        * across all vCPUs on all VMs. (Note that there may be some
+        * complications to providing this functionality, e.g., the MSR
+        * bitmap is currently per-VM rather than per-vCPU while the
+        * capability API wants to be able to control capabilities on a
+        * per-vCPU basis).
+        */
+       error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
+                              MSR_VMX_PROCBASED_CTLS2,
+                              PROCBASED2_ENABLE_RDTSCP, 0, &tmp);
+       cap_rdpid = error == 0 && host_has_rdpid();
+       cap_rdtscp = error == 0 && host_has_rdtscp();
+       if (cap_rdpid || cap_rdtscp)
+               procbased_ctls2 |= PROCBASED2_ENABLE_RDTSCP;
+
        cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
                                        MSR_VMX_PROCBASED_CTLS2,
                                        PROCBASED2_UNRESTRICTED_GUEST, 0,
@@ -997,6 +1054,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
         * the "use TSC offsetting" execution control is enabled and the
         * difference between the host TSC and the guest TSC is written
         * into the TSC offset in the VMCS.
+        *
+        * Guest TSC_AUX support is enabled if any of guest RDPID and/or
+        * guest RDTSCP support are enabled (since, as per Table 2-2 in SDM
+        * volume 4, TSC_AUX is supported if any of RDPID and/or RDTSCP are
+        * supported). If guest TSC_AUX support is enabled, TSC_AUX is
+        * exposed read-only so that the VMM can do one fewer MSR read per
+        * exit than if this register were exposed read-write; the guest
+        * restore value can be updated during guest writes (expected to be
+        * rare) instead of during all exits (common).
         */
        if (guest_msr_rw(vmx, MSR_GSBASE) ||
            guest_msr_rw(vmx, MSR_FSBASE) ||
@@ -1004,7 +1070,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
            guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
            guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
            guest_msr_rw(vmx, MSR_EFER) ||
-           guest_msr_ro(vmx, MSR_TSC))
+           guest_msr_ro(vmx, MSR_TSC) ||
+           ((cap_rdpid || cap_rdtscp) && guest_msr_ro(vmx, MSR_TSC_AUX)))
                panic("vmx_vminit: error setting guest msr access");
 
        vpid_alloc(vpid, VM_MAXCPU);
@@ -1083,6 +1150,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
                KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
 
                vmx->cap[i].set = 0;
+               vmx->cap[i].set |= cap_rdpid != 0 ? 1 << VM_CAP_RDPID : 0;
+               vmx->cap[i].set |= cap_rdtscp != 0 ? 1 << VM_CAP_RDTSCP : 0;
                vmx->cap[i].proc_ctls = procbased_ctls;
                vmx->cap[i].proc_ctls2 = procbased_ctls2;
                vmx->cap[i].exc_bitmap = exc_bitmap;
@@ -2989,11 +3058,30 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pm
                sidt(&idtr);
                ldt_sel = sldt();
 
+               /*
+                * The TSC_AUX MSR must be saved/restored while interrupts
+                * are disabled so that it is not possible for the guest
+                * TSC_AUX MSR value to be overwritten by the resume
+                * portion of the IPI_SUSPEND codepath. This is why the
+                * transition of this MSR is handled separately from those
+                * handled by vmx_msr_guest_{enter,exit}(), which are ok to
+                * be transitioned with preemption disabled but interrupts
+                * enabled.
+                *
+                * These vmx_msr_guest_{enter,exit}_tsc_aux() calls can be
+                * anywhere in this loop so long as they happen with
+                * interrupts disabled. This location is chosen for
+                * simplicity.
+                */
+               vmx_msr_guest_enter_tsc_aux(vmx, vcpu);
+
                vmx_run_trace(vmx, vcpu);
                vmx_dr_enter_guest(vmxctx);
                rc = vmx_enter_guest(vmxctx, vmx, launched);
                vmx_dr_leave_guest(vmxctx);
 
+               vmx_msr_guest_exit_tsc_aux(vmx, vcpu);
+
                bare_lgdt(&gdtr);
                lidt(&idtr);
                lldt(ldt_sel);
@@ -3331,6 +3419,14 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval)
                if (cap_monitor_trap)
                        ret = 0;
                break;
+       case VM_CAP_RDPID:
+               if (cap_rdpid)
+                       ret = 0;
+               break;
+       case VM_CAP_RDTSCP:
+               if (cap_rdtscp)
+                       ret = 0;
+               break;
        case VM_CAP_UNRESTRICTED_GUEST:
                if (cap_unrestricted_guest)
                        ret = 0;
@@ -3394,6 +3490,17 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
                        flag = PROCBASED_PAUSE_EXITING;
                        reg = VMCS_PRI_PROC_BASED_CTLS;
                }
+               break;
+       case VM_CAP_RDPID:
+       case VM_CAP_RDTSCP:
+               if (cap_rdpid || cap_rdtscp)
+                       /*
+                        * Choose not to support enabling/disabling
+                        * RDPID/RDTSCP via libvmmapi since, as per the
+                        * discussion in vmx_init(), RDPID/RDTSCP are
+                        * either always enabled or always disabled.
+                        */
+                       error = EOPNOTSUPP;
                break;
        case VM_CAP_UNRESTRICTED_GUEST:
                if (cap_unrestricted_guest) {

Modified: stable/12/sys/amd64/vmm/intel/vmx.h
==============================================================================
--- stable/12/sys/amd64/vmm/intel/vmx.h Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/amd64/vmm/intel/vmx.h Thu Sep 10 10:49:59 2020        
(r365560)
@@ -117,6 +117,7 @@ enum {
        IDX_MSR_SF_MASK,
        IDX_MSR_KGSBASE,
        IDX_MSR_PAT,
+       IDX_MSR_TSC_AUX,
        GUEST_MSR_NUM           /* must be the last enumeration */
 };
 
@@ -152,5 +153,19 @@ int        vmx_set_tsc_offset(struct vmx *vmx, int vcpu, 
uint
 
 extern char    vmx_exit_guest[];
 extern char    vmx_exit_guest_flush_rsb[];
+
+static inline bool
+vmx_have_msr_tsc_aux(struct vmx *vmx)
+{
+       int rdpid_rdtscp_bits = ((1 << VM_CAP_RDPID) | (1 << VM_CAP_RDTSCP));
+
+       /*
+        * Since the values of these bits are uniform across all vCPUs
+        * (see discussion in vmx_init() and initialization of these bits
+        * in vmx_vminit()), just always use vCPU-zero's capability set and
+        * remove the need to require a vcpuid argument.
+        */
+       return ((vmx->cap[0].set & rdpid_rdtscp_bits) != 0);
+}
 
 #endif

Modified: stable/12/sys/amd64/vmm/intel/vmx_msr.c
==============================================================================
--- stable/12/sys/amd64/vmm/intel/vmx_msr.c     Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/amd64/vmm/intel/vmx_msr.c     Thu Sep 10 10:49:59 2020        
(r365560)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 
 #include "vmx.h"
 #include "vmx_msr.h"
+#include "x86.h"
 
 static bool
 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
@@ -361,6 +362,16 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
 }
 
 void
+vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid)
+{
+       uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
+       uint32_t host_aux = cpu_auxmsr();
+
+       if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux)
+               wrmsr(MSR_TSC_AUX, guest_tsc_aux);
+}
+
+void
 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
 {
        uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
@@ -381,6 +392,23 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
        /* MSR_KGSBASE will be restored on the way back to userspace */
 }
 
+void
+vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid)
+{
+       uint64_t guest_tsc_aux = vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX];
+       uint32_t host_aux = cpu_auxmsr();
+
+       if (vmx_have_msr_tsc_aux(vmx) && guest_tsc_aux != host_aux)
+               /*
+                * Note that it is not necessary to save the guest value
+                * here; vmx->guest_msrs[vcpuid][IDX_MSR_TSC_AUX] always
+                * contains the current value since it is updated whenever
+                * the guest writes to it (which is expected to be very
+                * rare).
+                */
+               wrmsr(MSR_TSC_AUX, host_aux);
+}
+
 int
 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
 {
@@ -472,6 +500,17 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint
                break;
        case MSR_TSC:
                error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
+               break;
+       case MSR_TSC_AUX:
+               if (vmx_have_msr_tsc_aux(vmx))
+                       /*
+                        * vmx_msr_guest_enter_tsc_aux() will apply this
+                        * value when it is called immediately before guest
+                        * entry.
+                        */
+                       guest_msrs[IDX_MSR_TSC_AUX] = val;
+               else
+                       vm_inject_gp(vmx->vm, vcpuid);
                break;
        default:
                error = EINVAL;

Modified: stable/12/sys/amd64/vmm/intel/vmx_msr.h
==============================================================================
--- stable/12/sys/amd64/vmm/intel/vmx_msr.h     Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/amd64/vmm/intel/vmx_msr.h     Thu Sep 10 10:49:59 2020        
(r365560)
@@ -35,8 +35,10 @@ struct vmx;
 
 void vmx_msr_init(void);
 void vmx_msr_guest_init(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, int vcpuid);
 void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid);
 void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, int vcpuid);
 int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
 int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
 

Modified: stable/12/sys/amd64/vmm/x86.c
==============================================================================
--- stable/12/sys/amd64/vmm/x86.c       Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/amd64/vmm/x86.c       Thu Sep 10 10:49:59 2020        
(r365560)
@@ -91,7 +91,8 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 {
        const struct xsave_limits *limits;
        uint64_t cr4;
-       int error, enable_invpcid, level, width, x2apic_id;
+       int error, enable_invpcid, enable_rdpid, enable_rdtscp, level,
+           width, x2apic_id;
        unsigned int func, regs[4], logical_cpus;
        enum x2apic_state x2apic_state;
        uint16_t cores, maxcpus, sockets, threads;
@@ -194,11 +195,13 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
                        /* Hide mwaitx/monitorx capability from the guest */
                        regs[2] &= ~AMDID2_MWAITX;
 
-                       /*
-                        * Hide rdtscp/ia32_tsc_aux until we know how
-                        * to deal with them.
-                        */
-                       regs[3] &= ~AMDID_RDTSCP;
+                       /* Advertise RDTSCP if it is enabled. */
+                       error = vm_get_capability(vm, vcpu_id,
+                           VM_CAP_RDTSCP, &enable_rdtscp);
+                       if (error == 0 && enable_rdtscp)
+                               regs[3] |= AMDID_RDTSCP;
+                       else
+                               regs[3] &= ~AMDID_RDTSCP;
                        break;
 
                case CPUID_8000_0007:
@@ -441,6 +444,12 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
                                    CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA);
                                regs[2] = 0;
                                regs[3] &= CPUID_STDEXT3_MD_CLEAR;
+
+                               /* Advertise RDPID if it is enabled. */
+                               error = vm_get_capability(vm, vcpu_id,
+                                   VM_CAP_RDPID, &enable_rdpid);
+                               if (error == 0 && enable_rdpid)
+                                       regs[2] |= CPUID_STDEXT2_RDPID;
 
                                /* Advertise INVPCID if it is enabled. */
                                error = vm_get_capability(vm, vcpu_id,

Modified: stable/12/sys/i386/i386/initcpu.c
==============================================================================
--- stable/12/sys/i386/i386/initcpu.c   Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/i386/i386/initcpu.c   Thu Sep 10 10:49:59 2020        
(r365560)
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
+#include <machine/psl.h>
 #include <machine/specialreg.h>
 
 #include <vm/vm.h>
@@ -627,6 +628,18 @@ init_transmeta(void)
 }
 #endif
 
+/*
+ * The value for the TSC_AUX MSR and rdtscp/rdpid on the invoking CPU.
+ *
+ * Caller should prevent CPU migration.
+ */
+u_int
+cpu_auxmsr(void)
+{
+       KASSERT((read_eflags() & PSL_I) == 0, ("context switch possible"));
+       return (PCPU_GET(cpuid));
+}
+
 extern int elf32_nxstack;
 
 void
@@ -756,7 +769,7 @@ initializecpu(void)
 #endif
        if ((amd_feature & AMDID_RDTSCP) != 0 ||
            (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
-               wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid));
+               wrmsr(MSR_TSC_AUX, cpu_auxmsr());
 }
 
 void

Modified: stable/12/sys/x86/include/x86_var.h
==============================================================================
--- stable/12/sys/x86/include/x86_var.h Thu Sep 10 09:50:43 2020        
(r365559)
+++ stable/12/sys/x86/include/x86_var.h Thu Sep 10 10:49:59 2020        
(r365560)
@@ -123,6 +123,7 @@ cpu_getmaxphyaddr(void)
 bool   acpi_get_fadt_bootflags(uint16_t *flagsp);
 void   *alloc_fpusave(int flags);
 void   busdma_swi(void);
+u_int  cpu_auxmsr(void);
 bool   cpu_mwait_usable(void);
 void   cpu_probe_amdc1e(void);
 void   cpu_setregs(void);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to