From: David Woodhouse <[email protected]> Remove the now-unused KVM-private timekeeping infrastructure:
- struct pvclock_clock and struct pvclock_gtod_data - update_pvclock_gtod() and its seqcount-protected state copy - read_tsc() (KVM's private TSC reader with cycle_last clamping) - vgettsc() (KVM's private clocksource interpolation) - do_kvmclock_base(), do_monotonic(), do_realtime() Signed-off-by: David Woodhouse <[email protected]> Assisted-by: Kiro:claude-opus-4.6-1m --- Documentation/virt/kvm/devices/vcpu.rst | 4 +- arch/x86/kvm/vmx/vmx.c | 2 + arch/x86/kvm/x86.c | 177 +----------------- .../testing/selftests/kvm/x86/pvclock_test.c | 7 +- 4 files changed, 9 insertions(+), 181 deletions(-) diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 167aa4140d30..3d1a89c2b4f7 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -243,9 +243,9 @@ Returns: Specifies the guest's TSC offset relative to the host's TSC. The guest's TSC is then derived by the following equation: - guest_tsc = ((host_tsc * tsc_scale_ratio) >> tsc_scale_bits) + KVM_VCPU_TSC_OFFSET + guest_tsc = ((host_tsc * tsc_ratio) >> tsc_frac_bits) + KVM_VCPU_TSC_OFFSET -The values of tsc_scale_ratio and tsc_scale_bits can be obtained using +The values of tsc_ratio and tsc_frac_bits can be obtained using the KVM_VCPU_TSC_SCALE attribute. This attribute is useful to adjust the guest's TSC on live migration, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ed207cc7692d..1aaf3924a799 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8674,6 +8674,8 @@ __init int vmx_hardware_setup(void) if (cpu_has_vmx_tsc_scaling() && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) kvm_caps.has_tsc_control = true; + else + vmcs_config.cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING; kvm_caps.max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; kvm_caps.tsc_scaling_ratio_frac_bits = 48; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 93a428c37847..966057913366 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2347,58 +2347,6 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) return kvm_set_msr_ignored_check(vcpu, index, *data, true); } -struct pvclock_clock { - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - u64 base_cycles; - u64 offset; -}; - -struct pvclock_gtod_data { - seqcount_t seq; - - struct pvclock_clock clock; /* extract of a clocksource struct */ - struct pvclock_clock raw_clock; /* extract of a clocksource struct */ - - ktime_t offs_boot; - u64 wall_time_sec; -}; - -static struct pvclock_gtod_data pvclock_gtod_data; - -static void update_pvclock_gtod(struct timekeeper *tk) -{ - struct pvclock_gtod_data *vdata = &pvclock_gtod_data; - - write_seqcount_begin(&vdata->seq); - - /* copy pvclock gtod data */ - vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode; - vdata->clock.cycle_last = tk->tkr_mono.cycle_last; - vdata->clock.mask = tk->tkr_mono.mask; - vdata->clock.mult = tk->tkr_mono.mult; - vdata->clock.shift = tk->tkr_mono.shift; - vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec; - vdata->clock.offset = tk->tkr_mono.base; - - vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode; - vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last; - vdata->raw_clock.mask = tk->tkr_raw.mask; - vdata->raw_clock.mult = tk->tkr_raw.mult; - vdata->raw_clock.shift = tk->tkr_raw.shift; - vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec; - vdata->raw_clock.offset = tk->tkr_raw.base; - - vdata->wall_time_sec = tk->xtime_sec; - - vdata->offs_boot = tk->offs_boot; - - write_seqcount_end(&vdata->seq); -} - static s64 get_kvmclock_base_ns(void) { /* Count up from boot time, but with the frequency of the raw clock. */ @@ -3037,128 +2985,6 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) #ifdef CONFIG_X86_64 -static u64 read_tsc(void) -{ - u64 ret = (u64)rdtsc_ordered(); - u64 last = pvclock_gtod_data.clock.cycle_last; - - if (likely(ret >= last)) - return ret; - - /* - * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a function of time and the likely is - * very likely) and there's a data dependence, so force GCC - * to generate a branch instead. I don't barrier() because - * we don't actually need a barrier, and if this function - * ever gets inlined it will generate worse code. - */ - asm volatile (""); - return last; -} - -static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp, - int *mode) -{ - u64 tsc_pg_val; - long v; - - switch (clock->vclock_mode) { - case VDSO_CLOCKMODE_HVCLOCK: - if (hv_read_tsc_page_tsc(hv_get_tsc_page(), - tsc_timestamp, &tsc_pg_val)) { - /* TSC page valid */ - *mode = VDSO_CLOCKMODE_HVCLOCK; - v = (tsc_pg_val - clock->cycle_last) & - clock->mask; - } else { - /* TSC page invalid */ - *mode = VDSO_CLOCKMODE_NONE; - } - break; - case VDSO_CLOCKMODE_TSC: - *mode = VDSO_CLOCKMODE_TSC; - *tsc_timestamp = read_tsc(); - v = (*tsc_timestamp - clock->cycle_last) & - clock->mask; - break; - default: - *mode = VDSO_CLOCKMODE_NONE; - } - - if (*mode == VDSO_CLOCKMODE_NONE) - *tsc_timestamp = v = 0; - - return v * clock->mult; -} - -/* - * As with get_kvmclock_base_ns(), this counts from boot time, at the - * frequency of CLOCK_MONOTONIC_RAW (hence adding gtos->offs_boot). - */ -static int do_kvmclock_base(s64 *t, u64 *tsc_timestamp) -{ - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; - unsigned long seq; - int mode; - u64 ns; - - do { - seq = read_seqcount_begin(>od->seq); - ns = gtod->raw_clock.base_cycles; - ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode); - ns >>= gtod->raw_clock.shift; - ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot)); - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - *t = ns; - - return mode; -} - -/* - * This calculates CLOCK_MONOTONIC at the time of the TSC snapshot, with - * no boot time offset. - */ -static int do_monotonic(s64 *t, u64 *tsc_timestamp) -{ - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; - unsigned long seq; - int mode; - u64 ns; - - do { - seq = read_seqcount_begin(>od->seq); - ns = gtod->clock.base_cycles; - ns += vgettsc(>od->clock, tsc_timestamp, &mode); - ns >>= gtod->clock.shift; - ns += ktime_to_ns(gtod->clock.offset); - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - *t = ns; - - return mode; -} - -static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp) -{ - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; - unsigned long seq; - int mode; - u64 ns; - - do { - seq = read_seqcount_begin(>od->seq); - ts->tv_sec = gtod->wall_time_sec; - ns = gtod->clock.base_cycles; - ns += vgettsc(>od->clock, tsc_timestamp, &mode); - ns >>= gtod->clock.shift; - } while (unlikely(read_seqcount_retry(>od->seq, seq))); - - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return mode; -} - /* * Calculates the kvmclock_base_ns (CLOCK_MONOTONIC_RAW + boot time) and * reports the TSC value from which it do so. Returns true if host is @@ -6231,7 +6057,7 @@ static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, break; } case KVM_VCPU_TSC_SCALE: - r = -EINVAL; /* Read only */ + r = kvm_caps.has_tsc_control ? -EINVAL : -ENXIO; break; default: r = -ENXIO; @@ -10405,7 +10231,6 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, { struct timekeeper *tk = priv; - update_pvclock_gtod(tk); #ifdef CONFIG_X86_64 kvm_host_has_tsc_clocksource = diff --git a/tools/testing/selftests/kvm/x86/pvclock_test.c b/tools/testing/selftests/kvm/x86/pvclock_test.c index aecd62fc8a93..4c1869fa482e 100644 --- a/tools/testing/selftests/kvm/x86/pvclock_test.c +++ b/tools/testing/selftests/kvm/x86/pvclock_test.c @@ -14,7 +14,6 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" -#include "apic.h" #include <asm/pvclock-abi.h> @@ -262,10 +261,12 @@ int main(int argc, char *argv[]) return 0; } +static volatile uint32_t vcpu_counter; + static void guest_code_stable_bit(void) { - uint32_t apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); - uint64_t gpa = KVMCLOCK_GPA + apic_id * sizeof(struct pvclock_vcpu_time_info); + uint32_t idx = __atomic_fetch_add(&vcpu_counter, 1, __ATOMIC_SEQ_CST); + uint64_t gpa = KVMCLOCK_GPA + idx * sizeof(struct pvclock_vcpu_time_info); wrmsr(MSR_KVM_SYSTEM_TIME_NEW, gpa | KVM_MSR_ENABLED); GUEST_SYNC(0); -- 2.54.0

