Marcelo Tosatti wrote:
> The boot TSC sync check is failing on recent Linux SMP guests on TSC
> stable hosts.
>
>   

What about tsc unstable hosts?  If your patch convinces the guest its 
tsc is table, while the host tsc is not, then it may cause confusion 
later on.

> Following patch attempts to address the problems, which are:
>
> 1) APIC_DM_STARTUP, which is only called for vcpu's other than vcpu0,
> will trigger ->vcpu_reset(), setting the TSC to 0. Fix that by moving
> the guest_write_tsc(0) to vcpu_load.
>
> 2) vcpu's are initialized at different times by QEMU (vcpu0 init happens
> way earlier than the rest). Fix that by initializing the TSC of vcpu's >
> 0 with reference to vcpu0 init tsc value. This way TSC synchronization
> is kept (apparently Xen does something similar).
>
> 3) The code which adjusts the TSC of a VCPU on physical CPU switch is
> meant to guarantee that the guest sees a monotonically increasing value.
> However there is a large gap, in terms of clocks, between the time which
> the source CPU TSC is read and the time the destination CPU TSC is read.
> So move those two reads to happen at vcpu_clear.
>
> I believe that 3) is the reason for the -no-kvm-irqchip problems
> reported by Avi on RHEL5, with kernels < 2.6.21 (where vcpu->cpu pinning
> would fix the problem). Unfortunately I could reproduce that problem.
>
> 4-way guest with constant tick at 250Hz now reliably sees the TSC's
> synchronized, and idle guest CPU consumption is reduced by 50% (3-4%
> instead of 8%, the latter with acpi_pm_good boot parameter).
>
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 4741806..e1c8cf4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -47,6 +47,8 @@ struct vcpu_vmx {
>       struct kvm_vcpu       vcpu;
>       int                   launched;
>       u8                    fail;
> +     u64                   first_tsc;
> +     u64                   tsc_this;
>       u32                   idt_vectoring_info;
>       struct kvm_msr_entry *guest_msrs;
>       struct kvm_msr_entry *host_msrs;
> @@ -254,6 +256,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
>       if (vmx->vcpu.cpu == -1)
>               return;
>       smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1);
> +     rdtscll(vmx->tsc_this);
>       vmx->launched = 0;
>  }
>  
> @@ -480,6 +483,8 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
>       reload_host_efer(vmx);
>  }
>  
> +static void guest_write_tsc(u64 host_tsc, u64 guest_tsc);
> +
>  /*
>   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>   * vcpu mutex is already taken.
> @@ -488,7 +493,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  {
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>       u64 phys_addr = __pa(vmx->vmcs);
> -     u64 tsc_this, delta;
> +     u64 delta;
>  
>       if (vcpu->cpu != cpu) {
>               vcpu_clear(vmx);
> @@ -511,6 +516,19 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>               struct descriptor_table dt;
>               unsigned long sysenter_esp;
>  
> +             if (unlikely(vcpu->cpu == -1)) {
>   

This can happen after migration, I believe.

> +                     rdtscll(vcpu->arch.host_tsc);
> +                     vmx->tsc_this = vcpu->arch.host_tsc;
> +                     if (vcpu->vcpu_id == 0) {
> +                             guest_write_tsc(vcpu->arch.host_tsc, 0);
> +                             vmx->first_tsc = vcpu->arch.host_tsc;
> +                     } else {
> +                             struct vcpu_vmx *cpu0;
> +                             cpu0 = to_vmx(vcpu->kvm->vcpus[0]);
> +                             guest_write_tsc(cpu0->first_tsc, 0);
> +                     }
> +             }
> +
>   

Depending on vcpu_id == 0 can cause problems (for example, if vcpu 0 is 
somehow not the first to run).

We might initialize the tsc base on vm creation, and if the host tsc is 
synced, then the guest tsc should also be.

>               vcpu->cpu = cpu;
>               /*
>                * Linux uses per-cpu TSS and GDT, so set these when switching
> @@ -526,8 +544,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>               /*
>                * Make sure the time stamp counter is monotonous.
>                */
> -             rdtscll(tsc_this);
> -             delta = vcpu->arch.host_tsc - tsc_this;
> +             delta = vcpu->arch.host_tsc - vmx->tsc_this;
>               vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
>   

This is a little roundabout, how about moving the delta calculation 
immediately after the call to vcpu_clear()?

I don't think this is the cause of the problem, it can't account for 
more than a few hundred cycles, compared to the much greater vmentry cost.

Anyway it should be in a separate patch.



-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to