Resync all CPUs to measure TSC skew periodically.  Use the measured skew
to adjust the resync time (not done yet - heuristic needed)

Signed-off-by: Zachary Amsden <[email protected]>
---
 arch/x86/kvm/x86.c |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 792c895..3a854ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -750,9 +750,10 @@ struct cpu_tsc_vars
        u64                     last_ref;
 };
 static DEFINE_PER_CPU(struct cpu_tsc_vars, cpu_tsc_vars);
-
 static int tsc_base_cpu = -1;
 static unsigned long ref_tsc_khz;
+static u64 tsc_drift;
+static struct timer_list resync_timer;
 
 static inline int cpu_is_tsc_synchronized(int cpu)
 {
@@ -935,6 +936,7 @@ static void sync_tsc_helper(int measure_cpu, s64 *delta, 
atomic_t *ready)
  * Average and trim the samples of any outliers; we use > 2 x sigma
  */
 static u64 tsc_deviation;
+static u64 tsc_skew;
 static s64 average_samples(s64 *samples, unsigned num_samples)
 {
        unsigned i, j;
@@ -993,10 +995,24 @@ static void kvm_sync_tsc(void *cpup)
        s64 *delta1, *delta2;
        static atomic_t ready ____cacheline_aligned = ATOMIC_INIT(1);
        struct cpu_tsc_vars *cv = &per_cpu(cpu_tsc_vars, new_cpu);
+       static u64 old_base;
+       static s64 old_offset;
+       static unsigned long old_multiplier;
+       static unsigned int old_shift;
 
        BUG_ON(tsc_base_cpu == -1);
        local_irq_save(flags);
+
+       /*
+        * First, the new CPU may be just coming up to sync or might have
+        * changed frequency, which means the measurement base must be
+        * adjusted.  If not, we can use it to compute a skew estimate.
+        */
        if (raw_smp_processor_id() == new_cpu) {
+               old_multiplier = cv->tsc_multiplier;
+               old_shift = cv->tsc_shift;
+               old_base = cv->tsc_measure_base;
+               old_offset = cv->tsc_offset;
                cv->tsc_measure_base = native_read_tsc();
                cv->tsc_offset = 0;
                compute_best_multiplier(ref_tsc_khz, cv->tsc_khz,
@@ -1005,10 +1021,12 @@ static void kvm_sync_tsc(void *cpup)
                         " tsc_base_cpu = %d\n", __func__, new_cpu, cv->tsc_khz,
                         cv->tsc_measure_base, tsc_base_cpu);
        }
+
        delta1 = per_cpu(delta_array, tsc_base_cpu).delta;
        delta2 = per_cpu(delta_array, new_cpu).delta;
        sync_tsc_helper(tsc_base_cpu, delta1, &ready);
        sync_tsc_helper(new_cpu, delta2, &ready);
+
        if (raw_smp_processor_id() == new_cpu) {
                s64 accumulator = 0;
 
@@ -1024,8 +1042,40 @@ static void kvm_sync_tsc(void *cpup)
                accumulator += average_samples(&delta1[2], SYNC_TRIES-3);
                accumulator -= average_samples(&delta2[2], SYNC_TRIES-3);
                accumulator /= 2;
-
                cv->tsc_offset = accumulator;
+
+               /*
+                * Skew can be computed over a constant multiplier as follows:
+                *
+                * ref_new = (tsc_new - base_new) * mult + off_new
+                * ref_old = (tsc_old - base_old) * mult + off_old
+                *
+                * skew = ref_new - (ref_old + delta_ref)
+                *
+                * skew = off_new - off_old + mult(tsc_new - tsc_old)
+                *                - mult(base_new - base_old) - delta_ref
+                *
+                * The tsc_old / tsc_new values are not recoverable, but
+                * observe that mult(tsc_new - tsc_old) == delta_ref, so
+                *
+                *    skew = delta(off) - mult(delta base)
+                *
+                * To avoid problems with signed computation, we multiply
+                * unsigned numbers first before switching to signed arithmetic
+                */
+               if (old_multiplier == cv->tsc_multiplier &&
+                   old_shift == cv->tsc_shift) {
+                       u64 sbo = old_base, sbn = cv->tsc_measure_base;
+                       s64 skew;
+                       sbo = mult_precise(sbo, old_multiplier, old_shift);
+                       sbn = mult_precise(sbn, old_multiplier, old_shift);
+                       skew = cv->tsc_offset - old_offset + (sbo - sbn);
+                       if (skew < 0)
+                               skew = -skew;
+                       if (skew > tsc_skew)
+                               tsc_skew = skew;
+               }
+
                smp_wmb();
                ++cv->tsc_generation;
                atomic_set(&cv->tsc_synchronized, 1);
@@ -3611,6 +3661,8 @@ static long resync(void *unused)
        struct cpu_tsc_vars *cv = &__get_cpu_var(cpu_tsc_vars);
        u64 tsc = 0;
        int cpu;
+       static unsigned long jif_old;
+       unsigned long jif_delta;
 
        /*
         * First, make sure we are on the right CPU; between when the work got
@@ -3643,17 +3695,28 @@ static long resync(void *unused)
        cv->tsc_generation++; // XXX needed? */
        compute_best_multiplier(ref_tsc_khz, cv->tsc_khz, &cv->tsc_multiplier,
                                &cv->tsc_shift);
+       tsc_skew = 0;
        atomic_set(&cv->tsc_synchronized, 1);
+       smp_wmb();
 
        for_each_online_cpu(cpu)
                kvm_do_sync_tsc(cpu);
 
+       for_each_online_cpu(cpu)
+               while (!cpu_is_tsc_synchronized(cpu))
+                       cpu_relax();
+
+       smp_rmb();
+       jif_delta = jiffies - jif_old;
+       pr_debug("max TSC skew now estimated at %llu over %lu jiffies\n",
+                tsc_skew, jif_delta);
+       jif_old = jiffies;
+       mod_timer(&resync_timer, jiffies + HZ * 50);
        put_cpu();
        return 0;
 }
 
 static DEFINE_MUTEX(resync_lock);
-
 static void resync_all(void)
 {
        mutex_lock(&resync_lock);
@@ -3662,6 +3725,18 @@ static void resync_all(void)
        mutex_unlock(&resync_lock);
 }
 
+static struct work_struct resync_work;
+static void resync_work_fn(struct work_struct *work)
+{
+       resync_all();
+}
+
+static void resync_callout(unsigned long unused)
+{
+       INIT_WORK(&resync_work, resync_work_fn);
+       schedule_work(&resync_work);
+}
+
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long 
val,
                                     void *data)
 {
@@ -3836,6 +3911,15 @@ static void kvm_timer_init(void)
                for_each_possible_cpu(cpu)
                        per_cpu(cpu_tsc_vars, cpu).tsc_khz = tsc_khz;
        }
+
+       /*
+        * Now, pick a CPU to make the master and synchronize all other
+        * CPUs to it's clock.  Periodically check for drift as well.
+        * Our initial drift estimate is 1 ppm / sec.
+        */
+       tsc_drift = ref_tsc_khz / 1000;
+       init_timer(&resync_timer);
+       resync_timer.function = resync_callout;
        tsc_base_cpu = get_cpu();
        put_cpu();
        resync_all();
@@ -3898,6 +3982,9 @@ void kvm_arch_exit(void)
                        pci_write_config_byte(*nb, 0x87, disabled_c1_ramp);
        }
 #endif
+       mutex_lock(&resync_lock);
+       del_timer(&resync_timer);
+       mutex_unlock(&resync_lock);
 }
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-- 
1.6.5.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to