From: David Woodhouse <[email protected]> When in master clock mode, the KVM clock is defined in terms of the guest TSC. But get_kvmclock() was computing it from the host TSC without applying TSC scaling, leading to a systemic drift from the values the guest computes from its own TSC.
Store the VM's TSC scaling ratio in kvm_arch and precompute the guest-TSC-based mul/shift in pvclock_update_vm_gtod_copy(). Use these in get_kvmclock() to scale the host TSC delta to guest TSC before converting to nanoseconds. This avoids "definition C" of the KVM clock described in the earlier commit "KVM: x86/xen: Do not corrupt KVM clock in kvm_xen_shared_info_init()". Signed-off-by: David Woodhouse <[email protected]> --- arch/x86/include/asm/kvm_host.h | 4 +++ arch/x86/kvm/x86.c | 50 +++++++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 37264212c7df..5348fd5ea3f3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1490,6 +1490,7 @@ struct kvm_arch { u64 last_tsc_write; u32 last_tsc_khz; u64 last_tsc_offset; + u64 last_tsc_scaling_ratio; u64 cur_tsc_nsec; u64 cur_tsc_write; u64 cur_tsc_offset; @@ -1504,6 +1505,9 @@ struct kvm_arch { bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; + u64 master_tsc_scaling_ratio; + s8 master_tsc_shift; + u32 master_tsc_mul; #ifdef CONFIG_KVM_HYPERV struct kvm_hv hyperv; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f2653eaccdf8..09b00906b1de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2781,6 +2781,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc, kvm->arch.last_tsc_write = tsc; kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; kvm->arch.last_tsc_offset = offset; + kvm->arch.last_tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; vcpu->arch.last_guest_tsc = tsc; @@ -3109,6 +3110,8 @@ static bool kvm_get_walltime_and_clockread(struct timespec64 *ts, * */ +static unsigned long get_cpu_tsc_khz(void); + static void pvclock_update_vm_gtod_copy(struct kvm *kvm) { #ifdef CONFIG_X86_64 @@ -3132,9 +3135,28 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) && !ka->backwards_tsc_observed && !ka->boot_vcpu_runs_old_kvmclock; - if (ka->use_master_clock) + if (ka->use_master_clock) { + u64 tsc_hz; + atomic_set(&kvm_guest_has_master_clock, 1); + /* + * Copy the scaling ratio and precompute the mul/shift for + * converting guest TSC to nanoseconds. These are used by + * get_kvmclock() to compute kvmclock from the host TSC + * without needing a vCPU reference. + */ + ka->master_tsc_scaling_ratio = ka->last_tsc_scaling_ratio; + tsc_hz = (u64)get_cpu_tsc_khz() * 1000; + if (tsc_hz && kvm_caps.has_tsc_control) + tsc_hz = kvm_scale_tsc(tsc_hz, + ka->master_tsc_scaling_ratio); + if (tsc_hz) + kvm_get_time_scale(NSEC_PER_SEC, tsc_hz, + &ka->master_tsc_shift, + &ka->master_tsc_mul); + } + vclock_mode = pvclock_gtod_data.clock.vclock_mode; trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode, vcpus_matched); @@ -3235,10 +3257,28 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) data->flags |= KVM_CLOCK_TSC_STABLE; hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc); + + /* + * Use the precomputed guest-TSC-based mul/shift + * so that the kvmclock value matches what the + * guest computes from its own TSC. + */ + hv_clock.tsc_shift = ka->master_tsc_shift; + hv_clock.tsc_to_system_mul = ka->master_tsc_mul; + + if (kvm_caps.has_tsc_control) { + u64 tsc_delta = data->host_tsc - ka->master_cycle_now; + + tsc_delta = kvm_scale_tsc(tsc_delta, + ka->master_tsc_scaling_ratio); + data->clock = hv_clock.system_time + + pvclock_scale_delta(tsc_delta, + hv_clock.tsc_to_system_mul, + hv_clock.tsc_shift); + } else { + data->clock = __pvclock_read_cycles(&hv_clock, + data->host_tsc); + } put_cpu(); } else { -- 2.51.0

