The boot TSC sync check is failing on recent Linux SMP guests on TSC stable hosts.
Following patch attempts to address the problems, which are: 1) APIC_DM_STARTUP, which is only called for vcpu's other than vcpu0, will trigger ->vcpu_reset(), setting the TSC to 0. Fix that by moving the guest_write_tsc(0) to vcpu_load. 2) vcpu's are initialized at different times by QEMU (vcpu0 init happens way earlier than the rest). Fix that by initializing the TSC of vcpu's > 0 with reference to vcpu0 init tsc value. This way TSC synchronization is kept (apparently Xen does something similar). 3) The code which adjusts the TSC of a VCPU on physical CPU switch is meant to guarantee that the guest sees a monotonically increasing value. However there is a large gap, in terms of clocks, between the time which the source CPU TSC is read and the time the destination CPU TSC is read. So move those two reads to happen at vcpu_clear. I believe that 3) is the reason for the -no-kvm-irqchip problems reported by Avi on RHEL5, with kernels < 2.6.21 (where vcpu->cpu pinning would fix the problem). Unfortunately I could reproduce that problem. 4-way guest with constant tick at 250Hz now reliably sees the TSC's synchronized, and idle guest CPU consumption is reduced by 50% (3-4% instead of 8%, the latter with acpi_pm_good boot parameter). diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4741806..e1c8cf4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -47,6 +47,8 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; int launched; u8 fail; + u64 first_tsc; + u64 tsc_this; u32 idt_vectoring_info; struct kvm_msr_entry *guest_msrs; struct kvm_msr_entry *host_msrs; @@ -254,6 +256,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx) if (vmx->vcpu.cpu == -1) return; smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1); + rdtscll(vmx->tsc_this); vmx->launched = 0; } @@ -480,6 +483,8 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) reload_host_efer(vmx); } +static void guest_write_tsc(u64 host_tsc, u64 guest_tsc); + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -488,7 +493,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(vmx->vmcs); - u64 tsc_this, delta; + u64 delta; if (vcpu->cpu != cpu) { vcpu_clear(vmx); @@ -511,6 +516,19 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) struct descriptor_table dt; unsigned long sysenter_esp; + if (unlikely(vcpu->cpu == -1)) { + rdtscll(vcpu->arch.host_tsc); + vmx->tsc_this = vcpu->arch.host_tsc; + if (vcpu->vcpu_id == 0) { + guest_write_tsc(vcpu->arch.host_tsc, 0); + vmx->first_tsc = vcpu->arch.host_tsc; + } else { + struct vcpu_vmx *cpu0; + cpu0 = to_vmx(vcpu->kvm->vcpus[0]); + guest_write_tsc(cpu0->first_tsc, 0); + } + } + vcpu->cpu = cpu; /* * Linux uses per-cpu TSS and GDT, so set these when switching @@ -526,8 +544,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* * Make sure the time stamp counter is monotonous. */ - rdtscll(tsc_this); - delta = vcpu->arch.host_tsc - tsc_this; + delta = vcpu->arch.host_tsc - vmx->tsc_this; vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta); } } @@ -690,11 +707,8 @@ static u64 guest_read_tsc(void) * writes 'guest_tsc' into guest's timestamp counter "register" * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc */ -static void guest_write_tsc(u64 guest_tsc) +static void guest_write_tsc(u64 host_tsc, u64 guest_tsc) { - u64 host_tsc; - - rdtscll(host_tsc); vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); } @@ -758,6 +772,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry *msr; + u64 host_tsc; int ret = 0; switch (msr_index) { @@ -786,7 +801,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_TIME_STAMP_COUNTER: - guest_write_tsc(data); + rdtscll(host_tsc); + guest_write_tsc(host_tsc, data); break; default: msr = find_msr_entry(vmx, msr_index); @@ -1684,7 +1700,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); - guest_write_tsc(0); /* Special registers */ vmcs_write64(GUEST_IA32_DEBUGCTL, 0); ------------------------------------------------------------------------- Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel