Add dedicated hypervisor hooks for getting known TSC/CPU frequencies instead of overriding seemingly generic platform hooks, and explicitly priotize hypervisor-provided frequencies over native methods, but do NOT clobber the frequency obtained from trusted firmware. While shuffling the hooks around is arguably "six of one, half dozen of the other", scoping them to x86_hyper_init makes their purpose more obvious, and allows for explicitly defining the priority of sources (as is done here).
As is already done when trusted firmware provides the TSC frequency, ignore ignore tsc_early_khz if the exact TSC frequency was obtained from the hypervisor, as attempting to refine the TSC frequency when running in a VM is all but guaranteed to cause problems sooner or later due to the calibration sources being emulated devices in the vast majority of setups. Cc: David Woodhouse <[email protected]> Signed-off-by: Sean Christopherson <[email protected]> --- .../admin-guide/kernel-parameters.txt | 3 +- arch/x86/include/asm/acrn.h | 5 ---- arch/x86/include/asm/x86_init.h | 4 +++ arch/x86/kernel/cpu/acrn.c | 10 +++++-- arch/x86/kernel/cpu/mshyperv.c | 6 ++-- arch/x86/kernel/cpu/vmware.c | 8 ++--- arch/x86/kernel/jailhouse.c | 6 ++-- arch/x86/kernel/kvmclock.c | 6 ++-- arch/x86/kernel/tsc.c | 29 ++++++++++++++----- arch/x86/xen/time.c | 4 +-- 10 files changed, 50 insertions(+), 31 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 490e6aa72fc2..a387bb2c47e2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7948,7 +7948,8 @@ Kernel parameters Note, tsc_early_khz is ignored if the TSC frequency is provided by trusted firmware when running as an SNP or - TDX guest. + TDX guest, or when the hypervisor provides the exact + frequency via a paravirtual interface. tsx= [X86] Control Transactional Synchronization Extensions (TSX) feature in Intel processors that diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h index db42b477c41d..a892179c61c6 100644 --- a/arch/x86/include/asm/acrn.h +++ b/arch/x86/include/asm/acrn.h @@ -32,11 +32,6 @@ static inline u32 acrn_cpuid_base(void) return 0; } -static inline unsigned long acrn_get_tsc_khz(void) -{ - return cpuid_eax(ACRN_CPUID_TIMING_INFO); -} - /* * Hypercalls for ACRN * diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 953d3199408a..0c89bf40f507 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -123,6 +123,8 @@ struct x86_init_pci { * @msi_ext_dest_id: MSI supports 15-bit APIC IDs * @init_mem_mapping: setup early mappings during init_mem_mapping() * @init_after_bootmem: guest init after boot allocator is finished + * @get_tsc_khz: get the TSC frequency (returns 0 if frequency is unknown) + * @get_cpu_khz: get the CPU frequency (returns 0 if frequency is unknown) */ struct x86_hyper_init { void (*init_platform)(void); @@ -131,6 +133,8 @@ struct x86_hyper_init { bool (*msi_ext_dest_id)(void); void (*init_mem_mapping)(void); void (*init_after_bootmem)(void); + unsigned int (*get_tsc_khz)(void); + unsigned int (*get_cpu_khz)(void); }; /** diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c index dc119af83524..ad8f2da8003b 100644 --- a/arch/x86/kernel/cpu/acrn.c +++ b/arch/x86/kernel/cpu/acrn.c @@ -24,13 +24,15 @@ static u32 __init acrn_detect(void) return acrn_cpuid_base(); } +static unsigned int __init acrn_get_tsc_khz(void) +{ + return cpuid_eax(ACRN_CPUID_TIMING_INFO); +} + static void __init acrn_init_platform(void) { /* Install system interrupt handler for ACRN hypervisor callback */ sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); - - x86_platform.calibrate_tsc = acrn_get_tsc_khz; - x86_platform.calibrate_cpu = acrn_get_tsc_khz; } static bool acrn_x2apic_available(void) @@ -78,4 +80,6 @@ const __initconst struct hypervisor_x86 x86_hyper_acrn = { .type = X86_HYPER_ACRN, .init.init_platform = acrn_init_platform, .init.x2apic_available = acrn_x2apic_available, + .init.get_tsc_khz = acrn_get_tsc_khz, + .init.get_cpu_khz = acrn_get_tsc_khz, }; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 87beecec76f0..f9bc1c2d8c93 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -395,7 +395,7 @@ static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) } #endif -static unsigned long hv_get_tsc_khz(void) +static unsigned int __init hv_get_tsc_khz(void) { unsigned long freq; @@ -573,8 +573,8 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { - x86_platform.calibrate_tsc = hv_get_tsc_khz; - x86_platform.calibrate_cpu = hv_get_tsc_khz; + x86_init.hyper.get_tsc_khz = hv_get_tsc_khz; + x86_init.hyper.get_cpu_khz = hv_get_tsc_khz; setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); } diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 13b97265c535..3cb473cae462 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -64,7 +64,7 @@ struct vmware_steal_time { u64 reserved[7]; }; -static unsigned long vmware_tsc_khz __ro_after_init; +static unsigned long vmware_tsc_khz __initdata; static u8 vmware_hypercall_mode __ro_after_init; unsigned long vmware_hypercall_slow(unsigned long cmd, @@ -137,7 +137,7 @@ static inline int __vmware_platform(void) return eax != UINT_MAX && ebx == VMWARE_HYPERVISOR_MAGIC; } -static unsigned long vmware_get_tsc_khz(void) +static unsigned int __init vmware_get_tsc_khz(void) { return vmware_tsc_khz; } @@ -419,8 +419,8 @@ static void __init vmware_platform_setup(void) } vmware_tsc_khz = tsc_khz; - x86_platform.calibrate_tsc = vmware_get_tsc_khz; - x86_platform.calibrate_cpu = vmware_get_tsc_khz; + x86_init.hyper.get_tsc_khz = vmware_get_tsc_khz; + x86_init.hyper.get_cpu_khz = vmware_get_tsc_khz; /* Skip lapic calibration since we know the bus frequency. */ apic_set_timer_period_hz(ecx, "VMware hypervisor"); diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index f2d4ef89c085..e24c05ab4fae 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -68,7 +68,7 @@ static void __init jailhouse_timer_init(void) apic_set_timer_period_khz(setup_data.v1.apic_khz, "Jailhouse hypervisor"); } -static unsigned long jailhouse_get_tsc(void) +static unsigned int __init jailhouse_get_tsc(void) { return precalibrated_tsc_khz; } @@ -210,8 +210,6 @@ static void __init jailhouse_init_platform(void) x86_init.mpparse.parse_smp_cfg = jailhouse_parse_smp_config; x86_init.pci.arch_init = jailhouse_pci_arch_init; - x86_platform.calibrate_cpu = jailhouse_get_tsc; - x86_platform.calibrate_tsc = jailhouse_get_tsc; x86_platform.get_wallclock = jailhouse_get_wallclock; x86_platform.legacy.rtc = 0; x86_platform.legacy.warm_reset = 0; @@ -293,5 +291,7 @@ const struct hypervisor_x86 x86_hyper_jailhouse __refconst = { .detect = jailhouse_detect, .init.init_platform = jailhouse_init_platform, .init.x2apic_available = jailhouse_x2apic_available, + .init.get_tsc_khz = jailhouse_get_tsc, + .init.get_cpu_khz = jailhouse_get_tsc, .ignore_nopv = true, }; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index cb3d0ca1fa22..4f8299303a19 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -136,7 +136,7 @@ static inline void kvm_sched_clock_init(bool stable) * poll of guests can be running and trouble each other. So we preset * lpj here */ -static unsigned long kvm_get_tsc_khz(void) +static unsigned int __init kvm_get_tsc_khz(void) { setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); return pvclock_tsc_khz(this_cpu_pvti()); @@ -343,8 +343,8 @@ void __init kvmclock_init(void) flags = pvclock_read_flags(&hv_clock_boot[0].pvti); kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); - x86_platform.calibrate_tsc = kvm_get_tsc_khz; - x86_platform.calibrate_cpu = kvm_get_tsc_khz; + x86_init.hyper.get_tsc_khz = kvm_get_tsc_khz; + x86_init.hyper.get_cpu_khz = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 86384a83a5f6..1dca9464b41c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1451,13 +1451,17 @@ static int __init init_tsc_clocksource(void) device_initcall(init_tsc_clocksource); static bool __init determine_cpu_tsc_frequencies(bool early, + unsigned int known_cpu_khz, unsigned int known_tsc_khz) { /* Make sure that cpu and tsc are not already calibrated */ WARN_ON(cpu_khz || tsc_khz); if (early) { - cpu_khz = x86_platform.calibrate_cpu(); + if (known_cpu_khz) + cpu_khz = known_cpu_khz; + else + cpu_khz = x86_platform.calibrate_cpu(); if (known_tsc_khz) tsc_khz = known_tsc_khz; else @@ -1514,7 +1518,7 @@ static void __init tsc_enable_sched_clock(void) void __init tsc_early_init(void) { - unsigned int known_tsc_khz = 0; + unsigned int known_cpu_khz = 0, known_tsc_khz = 0; if (!boot_cpu_has(X86_FEATURE_TSC)) return; @@ -1522,22 +1526,33 @@ void __init tsc_early_init(void) if (is_early_uv_system()) return; + if (x86_init.hyper.get_cpu_khz) + known_cpu_khz = x86_init.hyper.get_cpu_khz(); + if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) known_tsc_khz = snp_secure_tsc_init(); else if (boot_cpu_has(X86_FEATURE_TDX_GUEST)) known_tsc_khz = tdx_tsc_init(); + /* + * If the TSC frequency wasn't provided by trusted firmware, try to get + * it from the hypervisor (which is untrusted when running as a CoCo guest). + */ + if (!known_tsc_khz && x86_init.hyper.get_tsc_khz) + known_tsc_khz = x86_init.hyper.get_tsc_khz(); + /* * Ignore the user-provided TSC frequency if the exact frequency was - * obtained from trusted firmware, as the user-provided frequency is - * intended as a "starting point", not a known, guaranteed frequency. + * obtained from trusted firmware or the hypervisor, as the user- + * provided frequency is intended as a "starting point", not a known, + * guaranteed frequency. */ if (!known_tsc_khz) known_tsc_khz = tsc_early_khz; else if (tsc_early_khz) - pr_err("Ignoring 'tsc_early_khz' in favor of trusted firmware.\n"); + pr_err("Ignoring 'tsc_early_khz' in favor of firmware/hypervisor.\n"); - if (!determine_cpu_tsc_frequencies(true, known_tsc_khz)) + if (!determine_cpu_tsc_frequencies(true, known_cpu_khz, known_tsc_khz)) return; tsc_enable_sched_clock(); } @@ -1558,7 +1573,7 @@ void __init tsc_init(void) if (!tsc_khz) { /* We failed to determine frequencies earlier, try again */ - if (!determine_cpu_tsc_frequencies(false, 0)) { + if (!determine_cpu_tsc_frequencies(false, 0, 0)) { mark_tsc_unstable("could not calculate TSC khz"); setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index d62c14334b35..1adb44fdddb2 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -38,7 +38,7 @@ static u64 xen_sched_clock_offset __read_mostly; /* Get the TSC speed from Xen */ -static unsigned long xen_tsc_khz(void) +static unsigned int __init xen_tsc_khz(void) { struct pvclock_vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_info[0].time; @@ -569,7 +569,7 @@ static void __init xen_init_time_common(void) static_call_update(pv_steal_clock, xen_steal_clock); paravirt_set_sched_clock(xen_sched_clock); - x86_platform.calibrate_tsc = xen_tsc_khz; + x86_init.hyper.get_tsc_khz = xen_tsc_khz; x86_platform.get_wallclock = xen_get_wallclock; } -- 2.55.0.rc0.799.gd6f94ed593-goog

