We must also handle the reverse condition; TSC can't go backwards
when trapping, and it's possible that bad hardware offsetting
makes this problem visible when entering trapping mode.

This is accomodated by adding a 'bump' field to the computed
TSC; it's not pleasant but it works.

Signed-off-by: Zachary Amsden <[email protected]>
---
 arch/x86/include/asm/kvm_host.h |    2 +
 arch/x86/kvm/x86.c              |   58 +++++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64569b0..950537c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,6 +413,8 @@ struct kvm_arch {
        u32 virtual_tsc_khz;
        u32 virtual_tsc_mult;
        s8 virtual_tsc_shift;
+       s64 tsc_bump;
+       s64 last_tsc_bump_ns;
 
        struct kvm_xen_hvm_config xen_hvm_config;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 33cb0f0..86f182a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -917,13 +917,48 @@ static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 
this_tsc_khz)
 
 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
 {
+       struct kvm_arch *arch = &vcpu->kvm->arch;
        u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
-                                     vcpu->kvm->arch.virtual_tsc_mult,
-                                     vcpu->kvm->arch.virtual_tsc_shift);
+                                     arch->virtual_tsc_mult,
+                                     arch->virtual_tsc_shift);
        tsc += vcpu->arch.last_tsc_write;
+       if (unlikely(arch->tsc_bump)) {
+               s64 bump;
+
+               /*
+                * Ugh.  There were a TSC bump.  See how much time elapsed
+                * in cycles since last read, take it off the bump, but
+                * ensure TSC advances by at least one.  We're serialized
+                * by the TSC write lock until the bump is gone.
+                */
+               spin_lock(&arch->tsc_write_lock);
+               bump = pvclock_scale_delta(kernel_ns - arch->last_tsc_bump_ns,
+                                          arch->virtual_tsc_mult,
+                                          arch->virtual_tsc_shift);
+               bump = arch->tsc_bump - bump + 1;
+               if (bump < 0) {
+                       pr_debug("kvm: vpu%d zeroed TSC bump\n", vcpu->vcpu_id);
+                       bump = 0;
+               }
+               arch->tsc_bump = bump;
+               arch->last_tsc_bump_ns = kernel_ns;
+               spin_unlock(&arch->tsc_write_lock);
+
+               tsc += bump;
+       }
        return tsc;
 }
 
+static void bump_guest_tsc(struct kvm_vcpu *vcpu, s64 bump, s64 kernel_ns)
+{
+       struct kvm *kvm = vcpu->kvm;
+       spin_lock(&kvm->arch.tsc_write_lock);
+       kvm->arch.tsc_bump += bump;
+       kvm->arch.last_tsc_bump_ns = kernel_ns;
+       spin_unlock(&vcpu->kvm->arch.tsc_write_lock);
+       pr_debug("kvm: vcpu%d bumped TSC by %lld\n", vcpu->vcpu_id, bump);
+}
+
 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -996,7 +1031,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        void *shared_kaddr;
        unsigned long this_tsc_khz;
        s64 kernel_ns, max_kernel_ns;
-       u64 tsc_timestamp;
+       u64 tsc_timestamp, tsc;
        bool kvmclock = (vcpu->time_page != NULL);
        bool catchup = !kvmclock;
 
@@ -1035,7 +1070,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        }
 
        if (catchup) {
-               u64 tsc = compute_guest_tsc(v, kernel_ns);
+               tsc = compute_guest_tsc(v, kernel_ns);
                if (tsc > tsc_timestamp)
                        kvm_x86_ops->adjust_tsc_offset(v, tsc-tsc_timestamp);
        }
@@ -1048,8 +1083,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        if (!kvmclock) {
                /* Now, see if we need to switch into trap mode */
                if ((vcpu->tsc_mode == TSC_MODE_TRAP || vcpu->tsc_overrun) &&
-                   !vcpu->tsc_trapping)
+                   !vcpu->tsc_trapping) {
+                       /*
+                        * Check for the (hopefully) unlikely event of the
+                        * computed virtual TSC being before the TSC we were
+                        * passing through in hardware.  This can happen if
+                        * the kernel has miscomputed tsc_khz, we miss an
+                        * overrun condition, or via bad SMP calibration.
+                        * If this is the case, we must add a bump to the
+                        * virtual TSC; this suck.
+                        */
+                       if (unlikely(tsc < vcpu->last_guest_tsc))
+                               bump_guest_tsc(v, vcpu->last_guest_tsc - tsc,
+                                              kernel_ns);
                        kvm_x86_ops->set_tsc_trap(v, 1);
+               }
 
                /* If we're falling behind and not trapping, re-trigger */
                if (!vcpu->tsc_trapping &&
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to