Hi, Here is a fourth diff addressing all the issues so far, that have been mainly pointed out by kettenis@, thanks!
Changes: - stop resetting the observed drift as it does not affect tsc re-initialization on resume, thus removing all changes from acpi_machdep.c - fix comment and put a temporary pretty printf of resume - rename cpu_cc_skew to ci_tsc_skew - remove unfinished code using MSR_TSC for synchronization (to be added later on together with the missing IA32_TSC_ADJUST wrmsr commands) All other technical issues were discussed and settled in private and require no change to the former diff. For testing you can also use the regress test after booting with tsc as default clock and waiting for an hour or so to let the clocks go wild: # cd /usr/src/regress/sys/kern/clock_gettime # make regress There is another test program flying around the mailing lists I guess, but I could not locate it now so if someone is kind enough to reply with the code, that would be lovely! Paul Index: arch/amd64/amd64/cpu.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137 +++ arch/amd64/amd64/cpu.c 6 Aug 2019 20:19:27 -0000 @@ -754,6 +754,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (cold && !CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +812,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,6 +833,18 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* + * Synchronize time stamp counters. Invalidate cache and + * synchronize twice (in tsc_sync_bp) to minimize possible + * cache effects. Disable interrupts to try and rule out any + * external interference. + */ + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); } if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { @@ -852,6 +869,8 @@ void cpu_boot_secondary(struct cpu_info *ci) { int i; + int64_t drift; + u_long s; atomic_setbits_int(&ci->ci_flags, CPUF_GO); @@ -864,6 +883,17 @@ cpu_boot_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else if (cold) { + /* Synchronize TSC again, check for drift. */ + drift = ci->ci_tsc_skew; + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + drift -= ci->ci_tsc_skew; + printf("TSC skew=%lld drift=%lld\n", + (long long)ci->ci_tsc_skew, (long long)drift); + tsc_sync_drift(drift); } } @@ -888,7 +918,14 @@ cpu_hatch(void *v) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif + /* + * Synchronize the TSC for the first time. Note that interrupts are + * off at this point. + */ + wbinvd(); ci->ci_flags |= CPUF_PRESENT; + ci->ci_tsc_skew = 0; /* reset on resume */ + tsc_sync_ap(ci); lapic_enable(); lapic_startclock(); Index: arch/amd64/amd64/tsc.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v retrieving revision 1.11 diff -u -p -u -p -r1.11 tsc.c --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11 +++ arch/amd64/amd64/tsc.c 6 Aug 2019 20:19:27 -0000 @@ -1,8 +1,10 @@ /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ /* + * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2016,2017 Reyk Floeter <r...@openbsd.org> * Copyright (c) 2017 Adam Steen <a...@adamsteen.com.au> * Copyright (c) 2017 Mike Belopuhov <m...@openbsd.org> + * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +22,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/timetc.h> +#include <sys/atomic.h> #include <machine/cpu.h> #include <machine/cpufunc.h> @@ -33,6 +36,12 @@ int tsc_recalibrate; uint64_t tsc_frequency; int tsc_is_invariant; +int64_t tsc_drift_max = 250; /* max cycles */ +int64_t tsc_drift_observed; + +volatile int64_t tsc_sync_val; +volatile struct cpu_info *tsc_sync_cpu; + uint tsc_get_timecount(struct timecounter *tc); struct timecounter tsc_timecounter = { @@ -172,10 +181,8 @@ calibrate_tsc_freq(void) return; tsc_frequency = freq; tsc_timecounter.tc_frequency = freq; -#ifndef MULTIPROCESSOR if (tsc_is_invariant) tsc_timecounter.tc_quality = 2000; -#endif } void @@ -194,14 +201,13 @@ cpu_recalibrate_tsc(struct timecounter * uint tsc_get_timecount(struct timecounter *tc) { - return rdtsc(); + return rdtsc() + curcpu()->ci_tsc_skew; } void tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq) { - if (!(ci->ci_flags & CPUF_PRIMARY) || - !(ci->ci_flags & CPUF_CONST_TSC) || + if (!(ci->ci_flags & CPUF_CONST_TSC) || !(ci->ci_flags & CPUF_INVAR_TSC)) return; @@ -211,9 +217,7 @@ tsc_timecounter_init(struct cpu_info *ci /* Newer CPUs don't require recalibration */ if (tsc_frequency > 0) { tsc_timecounter.tc_frequency = tsc_frequency; -#ifndef MULTIPROCESSOR tsc_timecounter.tc_quality = 2000; -#endif } else { tsc_recalibrate = 1; tsc_frequency = cpufreq; @@ -221,5 +225,109 @@ tsc_timecounter_init(struct cpu_info *ci calibrate_tsc_freq(); } - tc_init(&tsc_timecounter); + if (tsc_drift_observed > tsc_drift_max) { + printf("ERROR: %lld cycle TSC drift observed\n", + (long long)tsc_drift_observed); + tsc_timecounter.tc_quality = -1000; + tsc_is_invariant = 0; + } + + printf("%s: TSC skew=%lld observed drift=%lld\n", __func__, + (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed); + + if (ci->ci_flags & CPUF_PRIMARY) + tc_init(&tsc_timecounter); +} + +static uint64_t +cpu_counter_serializing(struct cpu_info *ci) +{ + return (rdtsc() + ci->ci_tsc_skew); +} + +/* + * Record drift (in clock cycles). Called during AP startup. + */ +void +tsc_sync_drift(int64_t drift) +{ + if (drift < 0) + drift = -drift; + if (drift > tsc_drift_observed) + tsc_drift_observed = drift; +} + +/* + * Called during startup of APs, by the boot processor. Interrupts + * are disabled on entry. + */ +static void +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp) +{ + uint64_t bptsc; + + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) + panic("tsc_sync_bp: 1"); + + /* Flag it and read our TSC. */ + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC); + bptsc = cpu_counter_serializing(ci) >> 1; + + /* Wait for remote to complete, and read ours again. */ + while ((ci->ci_flags & CPUF_SYNCTSC) != 0) + membar_consumer(); + bptsc += (cpu_counter_serializing(ci) >> 1); + + /* Wait for the results to come in. */ + while (tsc_sync_cpu == ci) + CPU_BUSY_CYCLE(); + if (tsc_sync_cpu != NULL) + panic("tsc_sync_bp: 2"); + + *bptscp = bptsc; + *aptscp = tsc_sync_val; +} + +void +tsc_sync_bp(struct cpu_info *ci) +{ + uint64_t bptsc, aptsc; + + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */ + tsc_read_bp(ci, &bptsc, &aptsc); + + /* Compute final value to adjust for skew. */ + ci->ci_tsc_skew = bptsc - aptsc; +} + +/* + * Called during startup of AP, by the AP itself. Interrupts are + * disabled on entry. + */ +static void +tsc_post_ap(struct cpu_info *ci) +{ + uint64_t tsc; + + /* Wait for go-ahead from primary. */ + while ((ci->ci_flags & CPUF_SYNCTSC) == 0) + membar_consumer(); + tsc = (cpu_counter_serializing(ci) >> 1); + + /* Instruct primary to read its counter. */ + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC); + tsc += (cpu_counter_serializing(ci) >> 1); + + /* Post result. Ensure the whole value goes out atomically. */ + (void)atomic_swap_64(&tsc_sync_val, tsc); + + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) + panic("tsc_sync_ap"); +} + +void +tsc_sync_ap(struct cpu_info *ci) +{ + tsc_post_ap(ci); + tsc_post_ap(ci); } Index: arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.131 diff -u -p -u -p -r1.131 cpu.h --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131 +++ arch/amd64/include/cpu.h 6 Aug 2019 20:19:27 -0000 @@ -206,6 +206,8 @@ struct cpu_info { union vmm_cpu_cap ci_vmm_cap; paddr_t ci_vmxon_region_pa; struct vmxon_region *ci_vmxon_region; + + int64_t ci_tsc_skew; /* counter skew vs cpu0 */ }; #define CPUF_BSP 0x0001 /* CPU is the original BSP */ @@ -221,6 +223,7 @@ struct cpu_info { #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */ #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */ +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */ #define CPUF_PRESENT 0x1000 /* CPU is present */ #define CPUF_RUNNING 0x2000 /* CPU is running */ #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */ Index: arch/amd64/include/cpuvar.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v retrieving revision 1.9 diff -u -p -u -p -r1.9 cpuvar.h --- arch/amd64/include/cpuvar.h 6 Oct 2017 13:33:53 -0000 1.9 +++ arch/amd64/include/cpuvar.h 6 Aug 2019 20:19:27 -0000 @@ -98,4 +98,8 @@ void cpu_init(struct cpu_info *); void cpu_init_first(void); void cpu_adjust_tsc_freq(uint64_t (*)()); +void tsc_sync_drift(int64_t); +void tsc_sync_bp(struct cpu_info *); +void tsc_sync_ap(struct cpu_info *); + #endif