Marcelo Tosatti wrote:
> Hi Avi,
>
> On Sun, Jan 13, 2008 at 02:19:29PM +0200, Avi Kivity wrote:
>   
>> Marcelo Tosatti wrote:
>>     
>>> The boot TSC sync check is failing on recent Linux SMP guests on TSC
>>> stable hosts.
>>>
>>>  
>>>       
>> What about tsc unstable hosts?  If your patch convinces the guest its 
>> tsc is table, while the host tsc is not, then it may cause confusion 
>> later on.
>>     
>
> The adjustment to zero won't fool the guest because it assumes that the
> TSC's are synchronized. It will simply set the guest TSC to zero on all
> VCPUs based on the time VCPU0 was initialized.
>
> That is, setting -(vcpu[0].first_tsc) on all VCPU's will not correct any
> synchronization problem.
>
>   

What I mean is, right now we present really broken tscs to the guest.  
After your patch, we present less-broken tscs (at boot, they will 
closely resemble stable tscs).  But after the machine idles a bit and 
cpufreq takes over, the tscs won't be stable any more.

>>> +static void guest_write_tsc(u64 host_tsc, u64 guest_tsc);
>>> +
>>> /*
>>>  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>>>  * vcpu mutex is already taken.
>>> @@ -488,7 +493,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
>>> cpu)
>>> {
>>>     struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>     u64 phys_addr = __pa(vmx->vmcs);
>>> -   u64 tsc_this, delta;
>>> +   u64 delta;
>>>
>>>     if (vcpu->cpu != cpu) {
>>>             vcpu_clear(vmx);
>>> @@ -511,6 +516,19 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
>>> cpu)
>>>             struct descriptor_table dt;
>>>             unsigned long sysenter_esp;
>>>
>>> +           if (unlikely(vcpu->cpu == -1)) {
>>>  
>>>       
>> This can happen after migration, I believe.
>>     
>
> Right now the destination host will do ->vcpu_reset() on all VCPU's on
> startup... so its already broken. This patch is not introducing any
> issues, just changing where it happens :)
>   

No, after the reset qemu will set the guest msrs including the tsc msr, 
overriding the reset value.

However, by that time vcpu->cpu will no longer be -1, so there's no 
issue there.

On the other hand, it may happen during cpu hotunplug (see 
decache_vcpus_on_vcpu).

>
>   
>>> +                   rdtscll(vcpu->arch.host_tsc);
>>> +                   vmx->tsc_this = vcpu->arch.host_tsc;
>>> +                   if (vcpu->vcpu_id == 0) {
>>> +                           guest_write_tsc(vcpu->arch.host_tsc, 0);
>>> +                           vmx->first_tsc = vcpu->arch.host_tsc;
>>> +                   } else {
>>> +                           struct vcpu_vmx *cpu0;
>>> +                           cpu0 = to_vmx(vcpu->kvm->vcpus[0]);
>>> +                           guest_write_tsc(cpu0->first_tsc, 0);
>>> +                   }
>>> +           }
>>> +
>>>  
>>>       
>> Depending on vcpu_id == 0 can cause problems (for example, if vcpu 0 is 
>> somehow not the first to run).
>>     
>
> But QEMU will always run kvm_create() first (which does
> kvm_create_vcpu(0)), then start the other threads later. So I don't see
> how that can happen.
>
>   

We're not developing purely for qemu (there's xenner, for example) and I 
don't want to embed hidden assumptions into the API.


>>>             vcpu->cpu = cpu;
>>>             /*
>>>              * Linux uses per-cpu TSS and GDT, so set these when 
>>>              switching
>>> @@ -526,8 +544,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int 
>>> cpu)
>>>             /*
>>>              * Make sure the time stamp counter is monotonous.
>>>              */
>>> -           rdtscll(tsc_this);
>>> -           delta = vcpu->arch.host_tsc - tsc_this;
>>> +           delta = vcpu->arch.host_tsc - vmx->tsc_this;
>>>             vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
>>>  
>>>       
>> This is a little roundabout, how about moving the delta calculation 
>> immediately after the call to vcpu_clear()?
>>
>> I don't think this is the cause of the problem, it can't account for 
>> more than a few hundred cycles, compared to the much greater vmentry cost.
>>     
>
> Actually it accounts for several thousand cycles warp by the time the
> kernel checks the TSC synchronization between CPU's, which happens very
> early on boot.
>
> You saw the hang on userspace init, by then there could be many
> VCPU->CPU switchs.
>
>   

Okay.

>> Anyway it should be in a separate patch.
>>     
>
> How does the following look (minus proper changelog):
>
>
> Index: kvm.quilt/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm.quilt.orig/arch/x86/kvm/vmx.c
> +++ kvm.quilt/arch/x86/kvm/vmx.c
> @@ -47,6 +47,7 @@ struct vcpu_vmx {
>       struct kvm_vcpu       vcpu;
>       int                   launched;
>       u8                    fail;
> +     u64                   first_tsc;
>       u32                   idt_vectoring_info;
>       struct kvm_msr_entry *guest_msrs;
>       struct kvm_msr_entry *host_msrs;
> @@ -480,6 +481,7 @@ static void vmx_load_host_state(struct v
>       reload_host_efer(vmx);
>  }
>  
> +static void guest_write_tsc(u64 host_tsc, u64 guest_tsc);
>  /*
>   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>   * vcpu mutex is already taken.
> @@ -511,6 +513,18 @@ static void vmx_vcpu_load(struct kvm_vcp
>               struct descriptor_table dt;
>               unsigned long sysenter_esp;
>  
> +             if (unlikely(vcpu->cpu == -1)) {
>   
> +                     rdtscll(vcpu->arch.host_tsc);
> +                     if (vcpu->vcpu_id == 0) {
> +                             guest_write_tsc(vcpu->arch.host_tsc, 0);
> +                             vmx->first_tsc = vcpu->arch.host_tsc;
> +                     } else {
> +                             struct vcpu_vmx *cpu0;
> +                             cpu0 = to_vmx(vcpu->kvm->vcpus[0]);
> +                             guest_write_tsc(cpu0->first_tsc, 0);
> +                     }
> +             }
> +
>   

I don't like the dependency on vcpu 0.  Also see comments above.

>  
> +static void __vcpu_clear(void *arg)
> +{
> +     struct vcpu_vmx *vmx = arg;
> +     int cpu = raw_smp_processor_id();
> +
> +     if (vmx->vcpu.cpu == cpu)
> +             vmcs_clear(vmx->vmcs);
> +     if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
> +             per_cpu(current_vmcs, cpu) = NULL;
> +     rdtscll(vmx->vcpu.arch.host_tsc);
> +}
> +
> +static void vcpu_clear(struct vcpu_vmx *vmx)
> +{
> +     u64 tsc_this, delta;
> +
> +     if (vmx->vcpu.cpu == -1)
> +             return;
> +     smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1);
> +     /*
> +      * Make sure the time stamp counter is monotonous.
> +      */
> +     rdtscll(tsc_this);
> +     delta = vmx->vcpu.arch.host_tsc - tsc_this;
> +     vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
> +     vmx->launched = 0;
> +}
>   

This part is fine.

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to