Hi, Scott

I tested your patch on my Ryzen7 box.
And I got failed message:

$ sysctl -a | grep tsc
kern.timecounter.choice=i8254(0) acpihpet0(1000) tsc(2000)
acpitimer0(1000)
machdep.tscfreq=3593244667
machdep.invarianttsc=1
$ sysctl kern.timecounter
kern.timecounter.tick=1
kern.timecounter.timestepwarnings=0
kern.timecounter.hardware=acpihpet0
kern.timecounter.choice=i8254(0) acpihpet0(1000) tsc(2000)
acpitimer0(1000)
$ dmesg | grep failed
tsc: cpu0/cpu2: sync test round 1/2 failed
tsc: cpu0/cpu4: sync test round 1/2 failed
tsc: cpu0/cpu5: sync test round 1/2 failed
tsc: cpu0/cpu6: sync test round 1/2 failed
tsc: cpu0/cpu7: sync test round 1/2 failed
$ 

dmesg:

OpenBSD 7.2-beta (GENERIC.MP) #10: Mon Aug  1 13:12:06 JST 2022
    a...@g2-obsd.my.domain:/usr/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 34256752640 (32669MB)
avail mem = 33201152000 (31663MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.8 @ 0xdb640000 (63 entries)
bios0: vendor American Megatrends Inc. version "9015" date 03/03/2020
bios0: MouseComputer Co.,Ltd. LM-AG400
acpi0 at bios0: ACPI 6.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP APIC FPDT FIDT SSDT MSDM SSDT SSDT MCFG HPET UEFI BGRT 
TPM2 IVRS PCCT SSDT CRAT CDIT SSDT SSDT SSDT
acpi0: wakeup devices GPP0(S4) GPP1(S4) GPP3(S4) GPP4(S4) GPP5(S4) GPP6(S4) 
GPP7(S4) GPP8(S4) X161(S4) GPP9(S4) X162(S4) GPPA(S4) GPPB(S4) GPPC(S4) 
GPPD(S4) GPPE(S4) [...]
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: AMD Ryzen 7 3700X 8-Core Processor, 3593.68 MHz, 17-71-00
cpu0: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu0: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 99MHz
cpu0: mwait min=64, max=64, C-substates=1.1, IBE
cpu1 at mainbus0: apid 2 (application processor)
cpu1: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu1: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu1: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 4 (application processor)
tsc: cpu0/cpu2: sync test round 1/2 failed
tsc: cpu0/cpu2: cpu2: 161 lags 72 cycles
cpu2: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu2: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu2: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 6 (application processor)
cpu3: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu3: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu3: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu3: smt 0, core 3, package 0
cpu4 at mainbus0: apid 8 (application processor)
tsc: cpu0/cpu4: sync test round 1/2 failed
tsc: cpu0/cpu4: cpu4: 129 lags 1296 cycles
cpu4: AMD Ryzen 7 3700X 8-Core Processor, 3593.25 MHz, 17-71-00
cpu4: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu4: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu4: smt 0, core 4, package 0
cpu5 at mainbus0: apid 10 (application processor)
tsc: cpu0/cpu5: sync test round 1/2 failed
tsc: cpu0/cpu5: cpu5: 130 lags 1260 cycles
cpu5: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu5: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu5: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu5: smt 0, core 5, package 0
cpu6 at mainbus0: apid 12 (application processor)
tsc: cpu0/cpu6: sync test round 1/2 failed
tsc: cpu0/cpu6: cpu6: 132 lags 1332 cycles
cpu6: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu6: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu6: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu6: smt 0, core 6, package 0
cpu7 at mainbus0: apid 14 (application processor)
tsc: cpu0/cpu7: sync test round 1/2 failed
tsc: cpu0/cpu7: cpu7: 134 lags 1152 cycles
cpu7: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu7: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu7: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu7: smt 0, core 7, package 0
cpu8 at mainbus0: apid 1 (application processor)
cpu8: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu8: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu8: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu8: smt 1, core 0, package 0
cpu9 at mainbus0: apid 3 (application processor)
cpu9: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu9: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu9: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu9: smt 1, core 1, package 0
cpu10 at mainbus0: apid 5 (application processor)
cpu10: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu10: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu10: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu10: smt 1, core 2, package 0
cpu11 at mainbus0: apid 7 (application processor)
cpu11: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu11: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu11: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu11: smt 1, core 3, package 0
cpu12 at mainbus0: apid 9 (application processor)
cpu12: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu12: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu12: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu12: smt 1, core 4, package 0
cpu13 at mainbus0: apid 11 (application processor)
cpu13: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu13: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu13: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu13: smt 1, core 5, package 0
cpu14 at mainbus0: apid 13 (application processor)
cpu14: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu14: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu14: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu14: smt 1, core 6, package 0
cpu15 at mainbus0: apid 15 (application processor)
cpu15: AMD Ryzen 7 3700X 8-Core Processor, 3593.24 MHz, 17-71-00
cpu15: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,IBPB,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu15: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 512KB 64b/line 
8-way L2 cache, 16MB 64b/line 16-way L3 cache
cpu15: smt 1, core 7, package 0
ioapic0 at mainbus0: apid 17 pa 0xfec00000, version 21, 24 pins, can't remap
ioapic1 at mainbus0: apid 18 pa 0xfec01000, version 21, 32 pins, can't remap
acpimcfg0 at acpi0
acpimcfg0: addr 0xf8000000, bus 0-63
acpihpet0 at acpi0: 14318180 Hz
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (GPP0)
acpiprt2 at acpi0: bus -1 (GPP1)
acpiprt3 at acpi0: bus -1 (GPP3)
acpiprt4 at acpi0: bus -1 (GPP4)
acpiprt5 at acpi0: bus -1 (GPP5)
acpiprt6 at acpi0: bus -1 (GPP6)
acpiprt7 at acpi0: bus -1 (GPP7)
acpiprt8 at acpi0: bus 8 (GPP8)
acpiprt9 at acpi0: bus -1 (GPP9)
acpiprt10 at acpi0: bus -1 (GPPA)
acpiprt11 at acpi0: bus -1 (GPPB)
acpiprt12 at acpi0: bus -1 (GPPC)
acpiprt13 at acpi0: bus -1 (GPPD)
acpiprt14 at acpi0: bus -1 (GPPE)
acpiprt15 at acpi0: bus -1 (GPPF)
acpiprt16 at acpi0: bus -1 (GP10)
acpiprt17 at acpi0: bus 9 (GP12)
acpiprt18 at acpi0: bus 10 (GP13)
acpiprt19 at acpi0: bus 11 (GP30)
acpiprt20 at acpi0: bus 12 (GP31)
acpiprt21 at acpi0: bus 1 (GPP2)
acpipci0 at acpi0 PCI0: 0x00000010 0x00000011 0x00000000
acpicmos0 at acpi0
acpibtn0 at acpi0: PWRB
amdgpio0 at acpi0 GPIO uid 0 addr 0xfed81500/0x400 irq 7, 184 pins
tpm0 at acpi0 TPM_: unsupported TPM2 start method 2
"AMDIF030" at acpi0 not configured
"PNP0C14" at acpi0 not configured
"PNP0C14" at acpi0 not configured
acpicpu0 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu1 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu2 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu3 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu4 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu5 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu6 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu7 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu8 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu9 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu10 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu11 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu12 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu13 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu14 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
acpicpu15 at acpi0: C2(0@400 io@0x414), C1(0@1 mwait), PSS
cpu0: 3593 MHz: speeds: 3600 2800 2200 MHz
pci0 at mainbus0 bus 0
ksmn0 at pci0 dev 0 function 0 "AMD 17h Root Complex" rev 0x00
"AMD 17h IOMMU" rev 0x00 at pci0 dev 0 function 2 not configured
pchb0 at pci0 dev 1 function 0 "AMD 17h Host" rev 0x00
ppb0 at pci0 dev 1 function 3 "AMD 17h PCIE" rev 0x00: msi
pci1 at ppb0 bus 1
xhci0 at pci1 dev 0 function 0 vendor "AMD", unknown product 0x43d5 rev 0x01: 
msi, xHCI 1.10
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 
addr 1
ahci0 at pci1 dev 0 function 1 "AMD 400 Series AHCI" rev 0x01: msi, AHCI 1.3.1
ahci0: port busy after first PMP probe FIS
ahci0: port 0: 1.5Gb/s
ahci0: port busy after first PMP probe FIS
ahci0: port busy after first PMP probe FIS
ahci0: port 5: 6.0Gb/s
scsibus1 at ahci0: 32 targets
cd0 at scsibus1 targ 0 lun 0: <PIONEER, DVD-RW DVR-S21, PA01> removable
sd0 at scsibus1 targ 5 lun 0: <ATA, WDC WD20EARX-00P, 51.0> naa.50014ee207300787
sd0: 1907729MB, 512 bytes/sector, 3907029168 sectors
ppb1 at pci1 dev 0 function 2 "AMD 400 Series PCIE" rev 0x01
pci2 at ppb1 bus 2
ppb2 at pci2 dev 0 function 0 "AMD 400 Series PCIE" rev 0x01: msi
pci3 at ppb2 bus 3
ppb3 at pci2 dev 4 function 0 "AMD 400 Series PCIE" rev 0x01: msi
pci4 at ppb3 bus 4
ppb4 at pci2 dev 5 function 0 "AMD 400 Series PCIE" rev 0x01: msi
pci5 at ppb4 bus 5
ppb5 at pci2 dev 6 function 0 "AMD 400 Series PCIE" rev 0x01: msi
pci6 at ppb5 bus 6
em0 at pci6 dev 0 function 0 "Intel 82572EI" rev 0x06: apic 18 int 10, address 
00:15:17:d6:9c:39
ppb6 at pci2 dev 7 function 0 "AMD 400 Series PCIE" rev 0x01: msi
pci7 at ppb6 bus 7
re0 at pci7 dev 0 function 0 "Realtek 8168" rev 0x15: RTL8168H/8111H (0x5400), 
msi, address d4:5d:64:82:1c:f5
rgephy0 at re0 phy 7: RTL8251 PHY, rev. 0
pchb1 at pci0 dev 2 function 0 "AMD 17h Host" rev 0x00
pchb2 at pci0 dev 3 function 0 "AMD 17h Host" rev 0x00
ppb7 at pci0 dev 3 function 1 "AMD 17h PCIE" rev 0x00: msi
pci8 at ppb7 bus 8
vendor "NVIDIA", unknown product 0x2184 (class display subclass VGA, rev 0xa1) 
at pci8 dev 0 function 0 not configured
azalia0 at pci8 dev 0 function 1 vendor "NVIDIA", unknown product 0x1aeb rev 
0xa1: msi
azalia0: no supported codecs
xhci1 at pci8 dev 0 function 2 vendor "NVIDIA", unknown product 0x1aec rev 
0xa1: msi, xHCI 1.10
usb1 at xhci1: USB revision 3.0
uhub1 at usb1 configuration 1 interface 0 "NVIDIA xHCI root hub" rev 3.00/1.00 
addr 1
vendor "NVIDIA", unknown product 0x1aed (class serial bus unknown subclass 
0x80, rev 0xa1) at pci8 dev 0 function 3 not configured
pchb3 at pci0 dev 4 function 0 "AMD 17h Host" rev 0x00
pchb4 at pci0 dev 5 function 0 "AMD 17h Host" rev 0x00
pchb5 at pci0 dev 7 function 0 "AMD 17h Host" rev 0x00
ppb8 at pci0 dev 7 function 1 "AMD 17h PCIE" rev 0x00
pci9 at ppb8 bus 9
vendor "AMD", unknown product 0x148a (class instrumentation unknown subclass 
0x00, rev 0x00) at pci9 dev 0 function 0 not configured
pchb6 at pci0 dev 8 function 0 "AMD 17h Host" rev 0x00
ppb9 at pci0 dev 8 function 1 "AMD 17h PCIE" rev 0x00
pci10 at ppb9 bus 10
vendor "AMD", unknown product 0x1485 (class instrumentation unknown subclass 
0x00, rev 0x00) at pci10 dev 0 function 0 not configured
ccp0 at pci10 dev 0 function 1 "AMD 17h Crypto" rev 0x00
xhci2 at pci10 dev 0 function 3 "AMD 17h xHCI" rev 0x00: msi, xHCI 1.10
usb2 at xhci2: USB revision 3.0
uhub2 at usb2 configuration 1 interface 0 "AMD xHCI root hub" rev 3.00/1.00 
addr 1
azalia1 at pci10 dev 0 function 4 "AMD 17h HD Audio" rev 0x00: msi
azalia1: codecs: Realtek ALC887
audio0 at azalia1
ppb10 at pci0 dev 8 function 2 "AMD 17h PCIE" rev 0x00
pci11 at ppb10 bus 11
ahci1 at pci11 dev 0 function 0 "AMD FCH AHCI" rev 0x51: msi, AHCI 1.3.1
scsibus2 at ahci1: 32 targets
ppb11 at pci0 dev 8 function 3 "AMD 17h PCIE" rev 0x00
pci12 at ppb11 bus 12
ahci2 at pci12 dev 0 function 0 "AMD FCH AHCI" rev 0x51: msi, AHCI 1.3.1
ahci2: port 5: 6.0Gb/s
scsibus3 at ahci2: 32 targets
sd1 at scsibus3 targ 5 lun 0: <ATA, KINGSTON RBUSNS8, SBFK> naa.50026b768395b832
sd1: 488386MB, 512 bytes/sector, 1000215216 sectors, thin
piixpm0 at pci0 dev 20 function 0 "AMD FCH SMBus" rev 0x61: SMI
iic0 at piixpm0
iic0: addr 0x20 00=04 01=01 04=6a 06=6a 08=6a 09=38 0a=0f 0b=0e 0d=2f 0f=42 
10=64 13=58 20=04 21=01 22=12 24=6a 26=6a 28=6a 29=14 2a=0f 2b=0e 2d=38 2f=44 
32=58 40=16 41=14 42=8f 43=aa 45=04 46=01 47=03 48=05 50=0f 51=07 52=55 53=ff 
60=36 61=24 words 00=0402 01=0100 02=0000 03=00d4 04=6a00 05=00d4 06=6a00 
07=00d4
spdmem0 at iic0 addr 0x52: 16GB DDR4 SDRAM PC4-19200
spdmem1 at iic0 addr 0x53: 16GB DDR4 SDRAM PC4-19200
iic1 at piixpm0
pcib0 at pci0 dev 20 function 3 "AMD FCH LPC" rev 0x51
pchb7 at pci0 dev 24 function 0 "AMD 17h Data Fabric" rev 0x00
pchb8 at pci0 dev 24 function 1 "AMD 17h Data Fabric" rev 0x00
pchb9 at pci0 dev 24 function 2 "AMD 17h Data Fabric" rev 0x00
pchb10 at pci0 dev 24 function 3 "AMD 17h Data Fabric" rev 0x00
pchb11 at pci0 dev 24 function 4 "AMD 17h Data Fabric" rev 0x00
pchb12 at pci0 dev 24 function 5 "AMD 17h Data Fabric" rev 0x00
pchb13 at pci0 dev 24 function 6 "AMD 17h Data Fabric" rev 0x00
pchb14 at pci0 dev 24 function 7 "AMD 17h Data Fabric" rev 0x00
isa0 at pcib0
isadma0 at isa0
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
vmm0 at mainbus0: SVM/RVI
efifb0 at mainbus0: 1920x1080, 32bpp
wsdisplay0 at efifb0 mux 1: console (std, vt100 emulation), using wskbd0
wsdisplay0: screen 1-5 added (std, vt100 emulation)
umass0 at uhub0 port 6 configuration 1 interface 0 "JMicron USB to ATA/ATAPI 
bridge" rev 2.00/1.00 addr 2
umass0: using SCSI over Bulk-Only
scsibus4 at umass0: 2 targets, initiator 0
sd2 at scsibus4 targ 1 lun 0: <SanDisk, SSD PLUS 1000GB, > 
serial.152d232921230440806F
sd2: 953869MB, 512 bytes/sector, 1953525168 sectors
uhub3 at uhub2 port 4 configuration 1 interface 0 "Logitech Logitech BT 
Mini-Receiver" rev 2.00/1.01 addr 2
uhidev0 at uhub3 port 2 configuration 1 interface 0 "Logitech Logitech BT 
Mini-Receiver" rev 2.00/1.01 addr 3
uhidev0: iclass 3/1
ukbd0 at uhidev0: 8 variable keys, 6 key codes
wskbd1 at ukbd0 mux 1
wskbd1: connecting to wsdisplay0
uhidev1 at uhub3 port 3 configuration 1 interface 0 "Logitech Logitech BT 
Mini-Receiver" rev 2.00/1.01 addr 4
uhidev1: iclass 3/1, 18 report ids
uhidpp0 at uhidev1
ums0 at uhidev1 reportid 2: 12 buttons, Z and W dir
wsmouse0 at ums0 mux 0
ucc0 at uhidev1 reportid 3: 652 usages, 18 keys, array
wskbd2 at ucc0 mux 1
wskbd2: connecting to wsdisplay0
uhid0 at uhidev1 reportid 4: input=1, output=0, feature=0
uhid1 at uhidev1 reportid 18: input=45, output=45, feature=0
vscsi0 at root
scsibus5 at vscsi0: 256 targets
softraid0 at root
scsibus6 at softraid0: 256 targets
root on sd2a (0673f2106150cadd.a) swap on sd2b dump on sd2b
--
ASOU Masato

From: Scott Cheloha <scottchel...@gmail.com>
Date: Wed, 20 Jul 2022 17:11:36 -0500

> Hi,
> 
> Thanks to everyone who tested v3.
> 
> Attached is v4.  I would like to put this into snaps (bcc: deraadt@).
> 
> If you've been following along and testing these patches, feel free to
> continue testing.  If your results change from v3 to v4, please reply
> with what happened and your dmesg.
> 
> I made a few small changes from v3:
> 
> - Only run the sync test after failing it on TSC_DEBUG kernels.
>   For example, it would be a waste of time to run the sync test
>   for 62 other CPU pairs if the CPU0/CPU1 sync test failed.
> 
> - Pad the tsc_test_status struct by hand.  Try to keep
>   tsc_test_status.val onto its own cache line and try to prevent one
>   instance of the struct from sharing a cache line with another
>   instance.
> 
> I am looking for OKs.
> 
> Assuming the results from snaps testing aren't catastrophic, and this
> version is OK'd, I hope to commit this after a couple weeks in snaps.
> 
> There are two things I'm unsure about that I hope a reviewer will
> comment on:
> 
> - Do we need to keep the double-test?  IIUC the purpose of the
>   double-test is to check for drift.  But with this change we no
>   longer have a concept of drift.
> 
> - Is the LFENCE in tsc_test_ap()/tst_test_bp() sufficient
>   to ensure one TSC value predates the other?  Or do I need
>   to insert membar_consumer()/membar_producer() calls to
>   provide that guarantee?
> 
> -Scott
> 
> Index: sys/arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.24
> diff -u -p -r1.24 tsc.c
> --- sys/arch/amd64/amd64/tsc.c        31 Aug 2021 15:11:54 -0000      1.24
> +++ sys/arch/amd64/amd64/tsc.c        20 Jul 2022 21:58:40 -0000
> @@ -36,13 +36,6 @@ int                tsc_recalibrate;
>  uint64_t     tsc_frequency;
>  int          tsc_is_invariant;
>  
> -#define      TSC_DRIFT_MAX                   250
> -#define TSC_SKEW_MAX                 100
> -int64_t      tsc_drift_observed;
> -
> -volatile int64_t     tsc_sync_val;
> -volatile struct cpu_info     *tsc_sync_cpu;
> -
>  u_int                tsc_get_timecount(struct timecounter *tc);
>  void         tsc_delay(int usecs);
>  
> @@ -236,22 +229,12 @@ cpu_recalibrate_tsc(struct timecounter *
>  u_int
>  tsc_get_timecount(struct timecounter *tc)
>  {
> -     return rdtsc_lfence() + curcpu()->ci_tsc_skew;
> +     return rdtsc_lfence();
>  }
>  
>  void
>  tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
>  {
> -#ifdef TSC_DEBUG
> -     printf("%s: TSC skew=%lld observed drift=%lld\n", ci->ci_dev->dv_xname,
> -         (long long)ci->ci_tsc_skew, (long long)tsc_drift_observed);
> -#endif
> -     if (ci->ci_tsc_skew < -TSC_SKEW_MAX || ci->ci_tsc_skew > TSC_SKEW_MAX) {
> -             printf("%s: disabling user TSC (skew=%lld)\n",
> -                 ci->ci_dev->dv_xname, (long long)ci->ci_tsc_skew);
> -             tsc_timecounter.tc_user = 0;
> -     }
> -
>       if (!(ci->ci_flags & CPUF_PRIMARY) ||
>           !(ci->ci_flags & CPUF_CONST_TSC) ||
>           !(ci->ci_flags & CPUF_INVAR_TSC))
> @@ -268,111 +251,276 @@ tsc_timecounter_init(struct cpu_info *ci
>               calibrate_tsc_freq();
>       }
>  
> -     if (tsc_drift_observed > TSC_DRIFT_MAX) {
> -             printf("ERROR: %lld cycle TSC drift observed\n",
> -                 (long long)tsc_drift_observed);
> -             tsc_timecounter.tc_quality = -1000;
> -             tsc_timecounter.tc_user = 0;
> -             tsc_is_invariant = 0;
> -     }
> -
>       tc_init(&tsc_timecounter);
>  }
>  
> -/*
> - * Record drift (in clock cycles).  Called during AP startup.
> - */
>  void
> -tsc_sync_drift(int64_t drift)
> +tsc_delay(int usecs)
>  {
> -     if (drift < 0)
> -             drift = -drift;
> -     if (drift > tsc_drift_observed)
> -             tsc_drift_observed = drift;
> +     uint64_t interval, start;
> +
> +     interval = (uint64_t)usecs * tsc_frequency / 1000000;
> +     start = rdtsc_lfence();
> +     while (rdtsc_lfence() - start < interval)
> +             CPU_BUSY_CYCLE();
>  }
>  
> +#ifdef MULTIPROCESSOR
> +
> +#define TSC_DEBUG 1
> +
> +/*
> + * Protections for global variables in this code:
> + *
> + *   a       Modified atomically
> + *   b       Protected by a barrier
> + *   p       Only modified by the primary CPU
> + */
> +
> +#define TSC_TEST_MS          1       /* Test round duration */
> +#define TSC_TEST_ROUNDS              2       /* Number of test rounds */
> +
>  /*
> - * Called during startup of APs, by the boot processor.  Interrupts
> - * are disabled on entry.
> + * tsc_test_status.val is cacheline-aligned (64-byte) to limit
> + * false sharing during the test and reduce our margin of error.
>   */
> +struct tsc_test_status {
> +     volatile uint64_t val;          /* [b] latest RDTSC value */
> +     uint64_t pad1[7];
> +     uint64_t lag_count;             /* [b] number of lags seen by CPU */
> +     uint64_t lag_max;               /* [b] Biggest lag seen */
> +     int64_t adj;                    /* [b] initial IA32_TSC_ADJUST value */
> +     uint64_t pad2[5];
> +} __aligned(64);
> +struct tsc_test_status tsc_ap_status;        /* [b] Test results from AP */
> +struct tsc_test_status tsc_bp_status;        /* [p] Test results from BP */
> +uint64_t tsc_test_cycles;            /* [p] TSC cycles per test round */
> +const char *tsc_ap_name;             /* [b] Name of AP */
> +volatile u_int tsc_egress_barrier;   /* [a] Test end barrier */
> +volatile u_int tsc_ingress_barrier;  /* [a] Test start barrier */
> +volatile u_int tsc_test_rounds;              /* [p] Remaining test rounds */
> +int tsc_is_synchronized = 1;         /* [p] TSC sync'd across all CPUs? */
> +
> +void tsc_report_test_results(void);
> +void tsc_reset_adjust(struct tsc_test_status *);
> +void tsc_test_ap(void);
> +void tsc_test_bp(void);
> +
>  void
> -tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +tsc_test_sync_bp(struct cpu_info *ci)
>  {
> -     uint64_t bptsc;
> -
> -     if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL)
> -             panic("tsc_sync_bp: 1");
> +     /* TSC must be constant and invariant to use it as a timecounter. */
> +     if (!tsc_is_invariant)
> +             return;
> +#ifndef TSC_DEBUG
> +     /* No point in testing again if we already failed. */
> +     if (!tsc_is_synchronized)
> +             return;
> +#endif
> +     /* Reset IA32_TSC_ADJUST if it exists. */
> +     tsc_reset_adjust(&tsc_bp_status);
>  
> -     /* Flag it and read our TSC. */
> -     atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> -     bptsc = (rdtsc_lfence() >> 1);
> +     /* Reset the test cycle limit and round count. */
> +     tsc_test_cycles = TSC_TEST_MS * tsc_frequency / 1000;
> +     tsc_test_rounds = TSC_TEST_ROUNDS;
> +
> +     do {
> +             /*
> +              * Pass through the ingress barrier, run the test,
> +              * then wait for the AP to reach the egress barrier.
> +              */
> +             atomic_inc_int(&tsc_ingress_barrier);
> +             while (tsc_ingress_barrier != 2)
> +                     CPU_BUSY_CYCLE();
> +             tsc_test_bp();
> +             while (tsc_egress_barrier != 1)
> +                     CPU_BUSY_CYCLE();
> +
> +             /*
> +              * Report what happened.  Adjust the timecounter quality
> +              * if we failed the test.
> +              *
> +              * XXX We need a tc_detach() function to cleanly
> +              * disable the timecounter.  Lowering the quality
> +              * like this is a nasty hack.
> +              */
> +             tsc_report_test_results();
> +             if (tsc_ap_status.lag_count || tsc_bp_status.lag_count) {
> +                     tsc_timecounter.tc_quality = -1000;
> +                     tsc_is_synchronized = 0;
> +                     tsc_test_rounds = 0;
> +             } else
> +                     tsc_test_rounds--;
> +
> +             /*
> +              * Clean up for the next round.
> +              *
> +              * It is safe to reset the ingress barrier because
> +              * at this point we know the AP has reached the egress
> +              * barrier.
> +              */
> +             memset(&tsc_ap_status, 0, sizeof tsc_ap_status);
> +             memset(&tsc_bp_status, 0, sizeof tsc_bp_status);
> +             tsc_ingress_barrier = 0;
> +             if (tsc_test_rounds == 0)
> +                     tsc_ap_name = NULL;
> +
> +             /*
> +              * Pass through the egress barrier and release the AP.
> +              * The AP is responsible for resetting the egress barrier.
> +              */
> +             if (atomic_inc_int_nv(&tsc_egress_barrier) != 2)
> +                     panic("%s: unexpected egress count", __func__);
> +     } while (tsc_test_rounds > 0);
> +}
>  
> -     /* Wait for remote to complete, and read ours again. */
> -     while ((ci->ci_flags & CPUF_SYNCTSC) != 0)
> -             membar_consumer();
> -     bptsc += (rdtsc_lfence() >> 1);
> +void
> +tsc_test_sync_ap(struct cpu_info *ci)
> +{
> +     if (!tsc_is_invariant)
> +             return;
> +#ifndef TSC_DEBUG
> +     if (!tsc_is_synchronized)
> +             return;
> +#endif
> +     /* The BP needs our name in order to report any problems. */
> +     tsc_ap_name = ci->ci_dev->dv_xname;
>  
> -     /* Wait for the results to come in. */
> -     while (tsc_sync_cpu == ci)
> -             CPU_BUSY_CYCLE();
> -     if (tsc_sync_cpu != NULL)
> -             panic("tsc_sync_bp: 2");
> +     tsc_reset_adjust(&tsc_ap_status);
>  
> -     *bptscp = bptsc;
> -     *aptscp = tsc_sync_val;
> +     /*
> +      * The AP is only responsible for running the test and
> +      * resetting the egress barrier.  The BP handles everything
> +      * else.
> +      */
> +     do {
> +             atomic_inc_int(&tsc_ingress_barrier);
> +             while (tsc_ingress_barrier != 2)
> +                     CPU_BUSY_CYCLE();
> +             tsc_test_ap();
> +             atomic_inc_int(&tsc_egress_barrier);
> +             while (atomic_cas_uint(&tsc_egress_barrier, 2, 0) != 2)
> +                     CPU_BUSY_CYCLE();
> +     } while (tsc_test_rounds > 0);
>  }
>  
>  void
> -tsc_sync_bp(struct cpu_info *ci)
> +tsc_report_test_results(void)
>  {
> -     uint64_t bptsc, aptsc;
> -
> -     tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> -     tsc_read_bp(ci, &bptsc, &aptsc);
> +     u_int round = TSC_TEST_ROUNDS - tsc_test_rounds + 1;
>  
> -     /* Compute final value to adjust for skew. */
> -     ci->ci_tsc_skew = bptsc - aptsc;
> +     if (tsc_bp_status.adj != 0) {
> +             printf("tsc: cpu0: IA32_TSC_ADJUST: %lld -> 0\n",
> +                 tsc_bp_status.adj);
> +     }
> +     if (tsc_ap_status.adj != 0) {
> +             printf("tsc: %s: IA32_TSC_ADJUST: %lld -> 0\n",
> +                 tsc_ap_name, tsc_ap_status.adj);
> +     }
> +     if (tsc_ap_status.lag_count > 0 || tsc_bp_status.lag_count > 0) {
> +             printf("tsc: cpu0/%s: sync test round %u/%u failed\n",
> +                 tsc_ap_name, round, TSC_TEST_ROUNDS);
> +     }
> +     if (tsc_bp_status.lag_count > 0) {
> +             printf("tsc: cpu0/%s: cpu0: %llu lags %llu cycles\n",
> +                 tsc_ap_name, tsc_bp_status.lag_count,
> +                 tsc_bp_status.lag_max);
> +     }
> +     if (tsc_ap_status.lag_count > 0) {
> +             printf("tsc: cpu0/%s: %s: %llu lags %llu cycles\n",
> +                 tsc_ap_name, tsc_ap_name, tsc_ap_status.lag_count,
> +                 tsc_ap_status.lag_max);
> +     }
>  }
>  
>  /*
> - * Called during startup of AP, by the AP itself.  Interrupts are
> - * disabled on entry.
> + * Reset IA32_TSC_ADJUST if we have it.
> + *
> + * XXX We should rearrange cpu_hatch() so that the feature flags
> + * are already set before we get here.  Do CPUID() and CPUID_LEAF()
> + * here as a workaround until then.
>   */
>  void
> -tsc_post_ap(struct cpu_info *ci)
> +tsc_reset_adjust(struct tsc_test_status *tts)
>  {
> -     uint64_t tsc;
> -
> -     /* Wait for go-ahead from primary. */
> -     while ((ci->ci_flags & CPUF_SYNCTSC) == 0)
> -             membar_consumer();
> -     tsc = (rdtsc_lfence() >> 1);
> -
> -     /* Instruct primary to read its counter. */
> -     atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> -     tsc += (rdtsc_lfence() >> 1);
> -
> -     /* Post result.  Ensure the whole value goes out atomically. */
> -     (void)atomic_swap_64(&tsc_sync_val, tsc);
> +     uint32_t eax, ebx, ecx, edx;
>  
> -     if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci)
> -             panic("tsc_sync_ap");
> +     CPUID(0, eax, ebx, ecx, edx);
> +     if (eax >= 7) {
> +             CPUID_LEAF(7, 0, eax, ebx, ecx, edx);
> +             if (ISSET(ebx, SEFF0EBX_TSC_ADJUST)) {
> +                     tts->adj = rdmsr(MSR_TSC_ADJUST);
> +                     if (tts->adj != 0)
> +                             wrmsr(MSR_TSC_ADJUST, 0);
> +             }
> +     }
>  }
>  
>  void
> -tsc_sync_ap(struct cpu_info *ci)
> +tsc_test_ap(void)
>  {
> -     tsc_post_ap(ci);
> -     tsc_post_ap(ci);
> +     uint64_t ap_val, bp_val, end, lag;
> +     u_int i = 0;
> +
> +     ap_val = rdtsc_lfence();
> +     end = ap_val + tsc_test_cycles;
> +     while (ap_val < end) {
> +             /*
> +              * The LFENCE ensures bp_val predates ap_val.  If ap_val
> +              * is smaller than bp_val then the AP's TSC must lag that
> +              * of the BP and the counters cannot be synchronized.
> +              */
> +             bp_val = tsc_bp_status.val;
> +             ap_val = rdtsc_lfence();
> +             tsc_ap_status.val = ap_val;
> +
> +             /*
> +              * Ensure the other CPU has a chance to run.  This is
> +              * crucial if the other CPU is our SMT sibling.  SMT
> +              * starvation can prevent this test from detecting
> +              * relatively large lags.  Eight is an arbitrary value,
> +              * but it seems to work in practice without impacting
> +              * the sensitivity of the test for non-sibling threads.
> +              */
> +             if (++i % 8 == 0)
> +                     CPU_BUSY_CYCLE();
> +
> +             /*
> +              * Record the magnitude of the problem if our TSC lags
> +              * the other.
> +              */
> +             if (__predict_false(ap_val < bp_val)) {
> +                     tsc_ap_status.lag_count++;
> +                     lag = bp_val - ap_val;
> +                     if (tsc_ap_status.lag_max < lag)
> +                             tsc_ap_status.lag_max = lag;
> +             }
> +     }
>  }
>  
>  void
> -tsc_delay(int usecs)
> +tsc_test_bp(void)
>  {
> -     uint64_t interval, start;
> +     uint64_t ap_val, bp_val, end, lag;
> +     u_int i = 0;
>  
> -     interval = (uint64_t)usecs * tsc_frequency / 1000000;
> -     start = rdtsc_lfence();
> -     while (rdtsc_lfence() - start < interval)
> -             CPU_BUSY_CYCLE();
> +     bp_val = rdtsc_lfence();
> +     end = bp_val + tsc_test_cycles;
> +     while (bp_val < end) {
> +             ap_val = tsc_ap_status.val;
> +             bp_val = rdtsc_lfence();
> +             tsc_bp_status.val = bp_val;
> +
> +             if (++i % 8 == 0)
> +                     CPU_BUSY_CYCLE();
> +
> +             if (__predict_false(bp_val < ap_val)) {
> +                     tsc_bp_status.lag_count++;
> +                     lag = ap_val - bp_val;
> +                     if (tsc_bp_status.lag_max < lag)
> +                             tsc_bp_status.lag_max = lag;
> +             }
> +     }
>  }
> +
> +#endif /* MULTIPROCESSOR */
> Index: sys/arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.156
> diff -u -p -r1.156 cpu.c
> --- sys/arch/amd64/amd64/cpu.c        26 Apr 2022 08:35:30 -0000      1.156
> +++ sys/arch/amd64/amd64/cpu.c        20 Jul 2022 21:58:40 -0000
> @@ -772,9 +772,9 @@ cpu_init(struct cpu_info *ci)
>       lcr4(cr4 & ~CR4_PGE);
>       lcr4(cr4);
>  
> -     /* Synchronize TSC */
> +     /* Check if TSC is synchronized. */
>       if (cold && !CPU_IS_PRIMARY(ci))
> -           tsc_sync_ap(ci);
> +           tsc_test_sync_ap(ci);
>  #endif
>  }
>  
> @@ -854,18 +854,14 @@ cpu_start_secondary(struct cpu_info *ci)
>  #endif
>       } else {
>               /*
> -              * Synchronize time stamp counters. Invalidate cache and
> -              * synchronize twice (in tsc_sync_bp) to minimize possible
> -              * cache effects. Disable interrupts to try and rule out any
> -              * external interference.
> +              * Test if TSCs are synchronized.  Invalidate cache to
> +              * minimize possible cache effects.  Disable interrupts to
> +              * try to rule out external interference.
>                */
>               s = intr_disable();
>               wbinvd();
> -             tsc_sync_bp(ci);
> +             tsc_test_sync_bp(ci);
>               intr_restore(s);
> -#ifdef TSC_DEBUG
> -             printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew);
> -#endif
>       }
>  
>       if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
> @@ -890,7 +886,6 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>       int i;
> -     int64_t drift;
>       u_long s;
>  
>       atomic_setbits_int(&ci->ci_flags, CPUF_GO);
> @@ -905,18 +900,11 @@ cpu_boot_secondary(struct cpu_info *ci)
>               db_enter();
>  #endif
>       } else if (cold) {
> -             /* Synchronize TSC again, check for drift. */
> -             drift = ci->ci_tsc_skew;
> +             /* Test if TSCs are synchronized again. */
>               s = intr_disable();
>               wbinvd();
> -             tsc_sync_bp(ci);
> +             tsc_test_sync_bp(ci);
>               intr_restore(s);
> -             drift -= ci->ci_tsc_skew;
> -#ifdef TSC_DEBUG
> -             printf("TSC skew=%lld drift=%lld\n",
> -                 (long long)ci->ci_tsc_skew, (long long)drift);
> -#endif
> -             tsc_sync_drift(drift);
>       }
>  }
>  
> @@ -942,13 +930,12 @@ cpu_hatch(void *v)
>  #endif
>  
>       /*
> -      * Synchronize the TSC for the first time. Note that interrupts are
> -      * off at this point.
> +      * Test if our TSC is synchronized for the first time.
> +      * Note that interrupts are off at this point.
>        */
>       wbinvd();
>       ci->ci_flags |= CPUF_PRESENT;
> -     ci->ci_tsc_skew = 0;    /* reset on resume */
> -     tsc_sync_ap(ci);
> +     tsc_test_sync_ap(ci);
>  
>       lapic_enable();
>       lapic_startclock();
> Index: sys/arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.145
> diff -u -p -r1.145 cpu.h
> --- sys/arch/amd64/include/cpu.h      12 Jul 2022 04:46:00 -0000      1.145
> +++ sys/arch/amd64/include/cpu.h      20 Jul 2022 21:58:40 -0000
> @@ -207,8 +207,6 @@ struct cpu_info {
>       paddr_t         ci_vmxon_region_pa;
>       struct vmxon_region *ci_vmxon_region;
>  
> -     int64_t         ci_tsc_skew;            /* counter skew vs cpu0 */
> -
>       char            ci_panicbuf[512];
>  
>       paddr_t         ci_vmcs_pa;
> @@ -228,7 +226,6 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC       0x0100          /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE      0x0200          /* CPU has curproc's xsave 
> state */
>  
> -#define CPUF_SYNCTSC 0x0800          /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000          /* CPU is present */
>  #define CPUF_RUNNING 0x2000          /* CPU is running */
>  #define CPUF_PAUSE   0x4000          /* CPU is paused in DDB */
> Index: sys/arch/amd64/include/cpuvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpuvar.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 cpuvar.h
> --- sys/arch/amd64/include/cpuvar.h   16 May 2021 04:33:05 -0000      1.11
> +++ sys/arch/amd64/include/cpuvar.h   20 Jul 2022 21:58:40 -0000
> @@ -97,8 +97,7 @@ void identifycpu(struct cpu_info *);
>  void cpu_init(struct cpu_info *);
>  void cpu_init_first(void);
>  
> -void tsc_sync_drift(int64_t);
> -void tsc_sync_bp(struct cpu_info *);
> -void tsc_sync_ap(struct cpu_info *);
> +void tsc_test_sync_bp(struct cpu_info *);
> +void tsc_test_sync_ap(struct cpu_info *);
>  
>  #endif
> 

Reply via email to