From: David Woodhouse <[email protected]> Restructure kvm_guest_time_update() so that kernel_ns/host_tsc are always "now" when doing TSC catchup, then swap in the master clock reference values afterward for the hv_clock.
This makes the TSC upscaling code considerably simpler: the catchup adjustment is computed as the delta between what the guest TSC *should* be at "now" and what it actually is, rather than mixing "now" and "master clock reference" timestamps. The seqcount loop now also contains the kvm_get_time_and_clockread() call (matching get_kvmclock's pattern), with the same WARN for unexpected failure. Based on a suggestion by Sean Christopherson. Signed-off-by: David Woodhouse <[email protected]> --- arch/x86/kvm/x86.c | 74 +++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8aae22401046..92e32d720523 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3363,46 +3363,63 @@ static void kvm_setup_guest_pvclock(struct pvclock_vcpu_time_info *ref_hv_clock, int kvm_guest_time_update(struct kvm_vcpu *v) { struct pvclock_vcpu_time_info hv_clock = {}; - unsigned long flags; u64 tgt_tsc_hz; unsigned seq; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; u64 tsc_timestamp, host_tsc; + u64 master_host_tsc = 0; + s64 master_kernel_ns = 0; bool use_master_clock; - kernel_ns = 0; - host_tsc = 0; - /* * If the host uses TSC clock, then passthrough TSC as stable * to the guest. */ do { seq = read_seqcount_begin(&ka->pvclock_sc); + use_master_clock = ka->use_master_clock; - if (use_master_clock) { - host_tsc = ka->master_cycle_now; - kernel_ns = ka->master_kernel_ns; - } + + /* + * The TSC read and the call to get_cpu_tsc_khz() must happen + * on the same CPU. + */ + get_cpu(); + + tgt_tsc_hz = (u64)get_cpu_tsc_khz() * 1000; + +#ifdef CONFIG_X86_64 + if (use_master_clock && + !kvm_get_time_and_clockread(&kernel_ns, &host_tsc) && + !read_seqcount_retry(&ka->pvclock_sc, seq)) + use_master_clock = false; +#endif + + put_cpu(); + + if (!use_master_clock) + break; + + master_host_tsc = ka->master_cycle_now; + master_kernel_ns = ka->master_kernel_ns; } while (read_seqcount_retry(&ka->pvclock_sc, seq)); - /* Keep irq disabled to prevent changes to the clock */ - local_irq_save(flags); - tgt_tsc_hz = (u64)get_cpu_tsc_khz() * 1000; if (unlikely(tgt_tsc_hz == 0)) { - local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); return 1; } + if (!use_master_clock) { + unsigned long flags; + + local_irq_save(flags); host_tsc = rdtsc(); kernel_ns = get_kvmclock_base_ns(); + local_irq_restore(flags); } - tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); - /* * We may have to catch up the TSC to match elapsed wall clock * time for two reasons, even if kvmclock is used. @@ -3411,17 +3428,32 @@ int kvm_guest_time_update(struct kvm_vcpu *v) * entry to avoid unknown leaps of TSC even when running * again on the same CPU. This may cause apparent elapsed * time to disappear, and the guest to stand still or run - * very slowly. + * very slowly. */ if (vcpu->tsc_catchup) { - u64 tsc = compute_guest_tsc(v, kernel_ns); - if (tsc > tsc_timestamp) { - adjust_tsc_offset_guest(v, tsc - tsc_timestamp); - tsc_timestamp = tsc; - } + s64 adjustment; + + /* + * Calculate the delta between what the guest TSC *should* be + * and what it actually is according to kvm_read_l1_tsc(). + */ + adjustment = compute_guest_tsc(v, kernel_ns) - + kvm_read_l1_tsc(v, host_tsc); + if (adjustment > 0) + adjust_tsc_offset_guest(v, adjustment); } - local_irq_restore(flags); + /* + * Now that TSC upscaling is out of the way, the remaining calculations + * are all relative to the reference time that's placed in hv_clock. + * If the master clock is NOT in use, the reference time is "now". If + * master clock is in use, the reference time comes from there. + */ + if (use_master_clock) { + host_tsc = master_host_tsc; + kernel_ns = master_kernel_ns; + } + tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); /* With all the info we got, fill in the values */ -- 2.54.0

