Author: neel
Date: Wed Apr 10 05:59:07 2013
New Revision: 249324
URL: http://svnweb.freebsd.org/changeset/base/249324

Log:
  Unsynchronized TSCs on the host require special handling in bhyve:
  
  - use clock_gettime(2) as the time base for the emulated ACPI timer instead
    of directly using rdtsc().
  
  - don't advertise the invariant TSC capability to the guest to discourage it
    from using the TSC as its time base.
  
  Discussed with:       jhb@ (about making 'smp_tsc' a global)
  Reported by:  Dan Mack on freebsd-virtualization@
  Obtained from:        NetApp

Modified:
  head/sys/amd64/include/clock.h
  head/sys/amd64/vmm/x86.c
  head/sys/x86/x86/tsc.c
  head/usr.sbin/bhyve/pmtmr.c

Modified: head/sys/amd64/include/clock.h
==============================================================================
--- head/sys/amd64/include/clock.h      Wed Apr 10 02:40:03 2013        
(r249323)
+++ head/sys/amd64/include/clock.h      Wed Apr 10 05:59:07 2013        
(r249324)
@@ -20,6 +20,9 @@ extern int    i8254_max_count;
 extern uint64_t        tsc_freq;
 extern int     tsc_is_invariant;
 extern int     tsc_perf_stat;
+#ifdef SMP
+extern int     smp_tsc;
+#endif
 
 void   i8254_init(void);
 

Modified: head/sys/amd64/vmm/x86.c
==============================================================================
--- head/sys/amd64/vmm/x86.c    Wed Apr 10 02:40:03 2013        (r249323)
+++ head/sys/amd64/vmm/x86.c    Wed Apr 10 05:59:07 2013        (r249324)
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/cpuset.h>
 
+#include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
@@ -89,11 +90,27 @@ x86_emulate_cpuid(struct vm *vm, int vcp
                case CPUID_8000_0003:
                case CPUID_8000_0004:
                case CPUID_8000_0006:
-               case CPUID_8000_0007:
                case CPUID_8000_0008:
                        cpuid_count(*eax, *ecx, regs);
                        break;
 
+               case CPUID_8000_0007:
+                       cpuid_count(*eax, *ecx, regs);
+                       /*
+                        * If the host TSCs are not synchronized across
+                        * physical cpus then we cannot advertise an
+                        * invariant tsc to a vcpu.
+                        *
+                        * XXX This still falls short because the vcpu
+                        * can observe the TSC moving backwards as it
+                        * migrates across physical cpus. But at least
+                        * it should discourage the guest from using the
+                        * TSC to keep track of time.
+                        */
+                       if (!smp_tsc)
+                               regs[3] &= ~AMDPM_TSC_INVARIANT;
+                       break;
+
                case CPUID_0000_0001:
                        do_cpuid(1, regs);
 

Modified: head/sys/x86/x86/tsc.c
==============================================================================
--- head/sys/x86/x86/tsc.c      Wed Apr 10 02:40:03 2013        (r249323)
+++ head/sys/x86/x86/tsc.c      Wed Apr 10 05:59:07 2013        (r249324)
@@ -61,7 +61,7 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, 
 TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
 
 #ifdef SMP
-static int     smp_tsc;
+int    smp_tsc;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
     "Indicates whether the TSC is safe to use in SMP mode");
 TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);

Modified: head/usr.sbin/bhyve/pmtmr.c
==============================================================================
--- head/usr.sbin/bhyve/pmtmr.c Wed Apr 10 02:40:03 2013        (r249323)
+++ head/usr.sbin/bhyve/pmtmr.c Wed Apr 10 05:59:07 2013        (r249324)
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <time.h>
 #include <assert.h>
 #include <pthread.h>
@@ -53,35 +54,108 @@ __FBSDID("$FreeBSD$");
 #define PMTMR_FREQ     3579545  /* 3.579545MHz */
 
 static pthread_mutex_t pmtmr_mtx;
-static uint64_t        pmtmr_tscf;
+
 static uint64_t        pmtmr_old;
+
+static uint64_t        pmtmr_tscf;
 static uint64_t        pmtmr_tsc_old;
 
+static clockid_t clockid = CLOCK_UPTIME_FAST;
+static struct timespec pmtmr_uptime_old;
+
+#define        timespecsub(vvp, uvp)                                           
\
+       do {                                                            \
+               (vvp)->tv_sec -= (uvp)->tv_sec;                         \
+               (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
+               if ((vvp)->tv_nsec < 0) {                               \
+                       (vvp)->tv_sec--;                                \
+                       (vvp)->tv_nsec += 1000000000;                   \
+               }                                                       \
+       } while (0)
+
+static uint64_t
+timespec_to_pmtmr(const struct timespec *tsnew, const struct timespec *tsold)
+{
+       struct timespec tsdiff;
+       int64_t nsecs;
+
+       tsdiff = *tsnew;
+       timespecsub(&tsdiff, tsold);
+       nsecs = tsdiff.tv_sec * 1000000000 + tsdiff.tv_nsec;
+       assert(nsecs >= 0);
+
+       return (nsecs * PMTMR_FREQ / 1000000000 + pmtmr_old);
+}
+
+static uint64_t
+tsc_to_pmtmr(uint64_t tsc_new, uint64_t tsc_old)
+{
+
+       return ((tsc_new - tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old);
+}
+
+static void
+pmtmr_init(void)
+{
+       size_t len;
+       int smp_tsc, err;
+       struct timespec tsnew, tsold = { 0 };
+
+       len = sizeof(smp_tsc);
+       err = sysctlbyname("kern.timecounter.smp_tsc", &smp_tsc, &len, NULL, 0);
+       assert(err == 0);
+
+       if (smp_tsc) {
+               len = sizeof(pmtmr_tscf);
+               err = sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len,
+                                  NULL, 0);
+               assert(err == 0);
+
+               pmtmr_tsc_old = rdtsc();
+               pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0);
+       } else {
+               if (getenv("BHYVE_PMTMR_PRECISE") != NULL)
+                       clockid = CLOCK_UPTIME;
+
+               err = clock_gettime(clockid, &tsnew);
+               assert(err == 0);
+
+               pmtmr_uptime_old = tsnew;
+               pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold);
+       }
+}
+
 static uint32_t
 pmtmr_val(void)
 {
+       struct timespec tsnew;
        uint64_t        pmtmr_tsc_new;
        uint64_t        pmtmr_new;
+       int             error;
+
        static int      inited = 0;
 
        if (!inited) {
-               size_t len;
-
-               inited = 1;
                pthread_mutex_init(&pmtmr_mtx, NULL);
-               len = sizeof(pmtmr_tscf);
-               sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len,
-                   NULL, 0);
-               pmtmr_tsc_old = rdtsc();
-               pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ;
+               pmtmr_init();
+               inited = 1;
        }
 
        pthread_mutex_lock(&pmtmr_mtx);
-       pmtmr_tsc_new = rdtsc();
-       pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf +
-           pmtmr_old;
+
+       if (pmtmr_tscf) {
+               pmtmr_tsc_new = rdtsc();
+               pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old);
+               pmtmr_tsc_old = pmtmr_tsc_new;
+       } else {
+               error = clock_gettime(clockid, &tsnew);
+               assert(error == 0);
+
+               pmtmr_new = timespec_to_pmtmr(&tsnew, &pmtmr_uptime_old);
+               pmtmr_uptime_old = tsnew;
+       }
        pmtmr_old = pmtmr_new;
-       pmtmr_tsc_old = pmtmr_tsc_new;
+
        pthread_mutex_unlock(&pmtmr_mtx);
 
        return (pmtmr_new); 
@@ -102,4 +176,3 @@ pmtmr_handler(struct vmctx *ctx, int vcp
 }
 
 INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler);
-
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to