Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup
On Mon, Sep 19, 2016 at 01:39:13PM +0200, Paolo Bonzini wrote: > Lately tsc page was implemented but filled with empty > values. This patch setup tsc page scale and offset based > on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. > > The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr > reads count to zero which potentially improves performance. > > Signed-off-by: Andrey Smetanin> Reviewed-by: Peter Hornyack > CC: Paolo Bonzini > CC: Radim Krčmář > CC: Roman Kagan > CC: Denis V. Lunev > [Computation of TSC page parameters rewritten to use the Linux timekeeper > parameters. - Paolo] > Signed-off-by: Paolo Bonzini > --- > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/hyperv.c | 162 > > arch/x86/kvm/hyperv.h | 3 + > arch/x86/kvm/x86.c | 8 +- > 4 files changed, 155 insertions(+), 20 deletions(-) Reviewed-by: Roman Kagan
Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup
On Mon, Sep 19, 2016 at 01:39:13PM +0200, Paolo Bonzini wrote: > Lately tsc page was implemented but filled with empty > values. This patch setup tsc page scale and offset based > on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. > > The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr > reads count to zero which potentially improves performance. > > Signed-off-by: Andrey Smetanin > Reviewed-by: Peter Hornyack > CC: Paolo Bonzini > CC: Radim Krčmář > CC: Roman Kagan > CC: Denis V. Lunev > [Computation of TSC page parameters rewritten to use the Linux timekeeper > parameters. - Paolo] > Signed-off-by: Paolo Bonzini > --- > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/hyperv.c | 162 > > arch/x86/kvm/hyperv.h | 3 + > arch/x86/kvm/x86.c | 8 +- > 4 files changed, 155 insertions(+), 20 deletions(-) Reviewed-by: Roman Kagan
[PATCH 4/4] KVM: x86: Hyper-V tsc page setup
Lately tsc page was implemented but filled with empty values. This patch setup tsc page scale and offset based on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr reads count to zero which potentially improves performance. Signed-off-by: Andrey SmetaninReviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Radim Krčmář CC: Roman Kagan CC: Denis V. Lunev [Computation of TSC page parameters rewritten to use the Linux timekeeper parameters. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/hyperv.c | 162 arch/x86/kvm/hyperv.h | 3 + arch/x86/kvm/x86.c | 8 +- 4 files changed, 155 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 32a43a25d415..4b20f7304b9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -702,6 +702,8 @@ struct kvm_hv { /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; + + HV_REFERENCE_TSC_PAGE tsc_ref; }; struct kvm_arch { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed5b77f39ffb..555951625350 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -386,7 +386,21 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic) static u64 get_time_ref_counter(struct kvm *kvm) { - return div_u64(get_kvmclock_ns(kvm), 100); + struct kvm_hv *hv = >arch.hyperv; + struct kvm_vcpu *vcpu; + u64 tsc; + + /* +* The guest has not set up the TSC page or the clock isn't +* stable, fall back to get_kvmclock_ns. +*/ + if (!hv->tsc_ref.tsc_sequence) + return div_u64(get_kvmclock_ns(kvm), 100); + + vcpu = kvm_get_vcpu(kvm, 0); + tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) + + hv->tsc_ref.tsc_offset; } static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, @@ -756,6 +774,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, return 0; } +/* + * The kvmclock and Hyper-V TSC page use similar formulas, and converting + * between them is possible: + * + * kvmclock formula: + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + * + * Hyper-V formula: + *nsec/100 = ticks * scale / 2^64 + offset + * + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. + * By dividing the kvmclock formula by 100 and equating what's left we get: + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100 + * + * Now expand the kvmclock formula and divide by 100: + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * + system_time / 100 + * + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: + *nsec/100 = ticks * scale / 2^64 + * - tsc_timestamp * scale / 2^64 + * + system_time / 100 + * + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: + *offset = system_time / 100 - tsc_timestamp * scale / 2^64 + * + * These two equivalencies are implemented in this function. + */ +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, + HV_REFERENCE_TSC_PAGE *tsc_ref) +{ + u64 max_mul; + + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) + return false; + + /* +* check if scale would overflow, if so we use the time ref counter +*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 +*tsc_to_system_mul / 100 >= 2^(32-tsc_shift) +*tsc_to_system_mul >= 100 * 2^(32-tsc_shift) +*/ + max_mul = 100ull << (32 - hv_clock->tsc_shift); + if (hv_clock->tsc_to_system_mul >= max_mul) + return false; + + /* +* Otherwise compute the scale and offset according to the formulas +* derived above. +*/ + tsc_ref->tsc_scale = + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), + hv_clock->tsc_to_system_mul, + 100); +
[PATCH 4/4] KVM: x86: Hyper-V tsc page setup
Lately tsc page was implemented but filled with empty values. This patch setup tsc page scale and offset based on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr reads count to zero which potentially improves performance. Signed-off-by: Andrey Smetanin Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Radim Krčmář CC: Roman Kagan CC: Denis V. Lunev [Computation of TSC page parameters rewritten to use the Linux timekeeper parameters. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/hyperv.c | 162 arch/x86/kvm/hyperv.h | 3 + arch/x86/kvm/x86.c | 8 +- 4 files changed, 155 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 32a43a25d415..4b20f7304b9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -702,6 +702,8 @@ struct kvm_hv { /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; + + HV_REFERENCE_TSC_PAGE tsc_ref; }; struct kvm_arch { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed5b77f39ffb..555951625350 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -386,7 +386,21 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic) static u64 get_time_ref_counter(struct kvm *kvm) { - return div_u64(get_kvmclock_ns(kvm), 100); + struct kvm_hv *hv = >arch.hyperv; + struct kvm_vcpu *vcpu; + u64 tsc; + + /* +* The guest has not set up the TSC page or the clock isn't +* stable, fall back to get_kvmclock_ns. +*/ + if (!hv->tsc_ref.tsc_sequence) + return div_u64(get_kvmclock_ns(kvm), 100); + + vcpu = kvm_get_vcpu(kvm, 0); + tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64) + + hv->tsc_ref.tsc_offset; } static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, @@ -756,6 +774,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, return 0; } +/* + * The kvmclock and Hyper-V TSC page use similar formulas, and converting + * between them is possible: + * + * kvmclock formula: + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + * + * Hyper-V formula: + *nsec/100 = ticks * scale / 2^64 + offset + * + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. + * By dividing the kvmclock formula by 100 and equating what's left we get: + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100 + * + * Now expand the kvmclock formula and divide by 100: + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * + system_time / 100 + * + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: + *nsec/100 = ticks * scale / 2^64 + * - tsc_timestamp * scale / 2^64 + * + system_time / 100 + * + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: + *offset = system_time / 100 - tsc_timestamp * scale / 2^64 + * + * These two equivalencies are implemented in this function. + */ +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, + HV_REFERENCE_TSC_PAGE *tsc_ref) +{ + u64 max_mul; + + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) + return false; + + /* +* check if scale would overflow, if so we use the time ref counter +*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 +*tsc_to_system_mul / 100 >= 2^(32-tsc_shift) +*tsc_to_system_mul >= 100 * 2^(32-tsc_shift) +*/ + max_mul = 100ull << (32 - hv_clock->tsc_shift); + if (hv_clock->tsc_to_system_mul >= max_mul) + return false; + + /* +* Otherwise compute the scale and offset according to the formulas +* derived above. +*/ + tsc_ref->tsc_scale = + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), + hv_clock->tsc_to_system_mul, + 100); + + tsc_ref->tsc_offset = hv_clock->system_time; + do_div(tsc_ref->tsc_offset, 100); + tsc_ref->tsc_offset -= +
Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup
On Thu, Sep 01, 2016 at 05:26:15PM +0200, Paolo Bonzini wrote: > Lately tsc page was implemented but filled with empty > values. This patch setup tsc page scale and offset based > on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. > > The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr > reads count to zero which potentially improves performance. > > Signed-off-by: Andrey Smetanin> Reviewed-by: Peter Hornyack > CC: Paolo Bonzini > CC: Radim Krčmář > CC: Roman Kagan > CC: Denis V. Lunev > [Computation of TSC page parameters rewritten to use the Linux timekeeper > parameters. - Paolo] > Signed-off-by: Paolo Bonzini > --- > arch/x86/kvm/hyperv.c | 141 > -- > arch/x86/kvm/hyperv.h | 3 ++ > arch/x86/kvm/x86.c| 8 +-- > 3 files changed, 133 insertions(+), 19 deletions(-) > > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index ed5b77f39ffb..e089d1f52dc0 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu > *vcpu, > return 0; > } > > +/* > + * The kvmclock and Hyper-V TSC page use similar formulas. Because the KVM > + * one is more precise, it is a little more complex. However, converting I'm not sure you're right regarding which one is more precise :) Hyper-V uses a right shift of 64 which is higher precision than typical kvmclock shift of around 22. > + * between them is possible: > + * > + * kvmclock formula: > + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) > + * + system_time > + * > + * Hyper-V formula: > + *nsec/100 = ticks * scale / 2^64 + offset > + * > + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V > formula. > + * By dividing the kvmclock formula by 100 and equating what's left we get: > + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / > 100 > + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / > 100 > + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / > 100 > + * > + * Now expand the kvmclock formula and divide by 100: > + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) > + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) > + * + system_time > + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 > + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 > + * + system_time / 100 > + * > + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: > + *nsec/100 = ticks * scale / 2^64 > + * - tsc_timestamp * scale / 2^64 > + * + system_time / 100 > + * > + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: > + *offset = system_time / 100 - tsc_timestamp * scale / 2^64 > + * > + * These two equivalencies are implemented in this function. > + */ > +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info > *hv_clock, > + HV_REFERENCE_TSC_PAGE *tsc_ref) > +{ > + u64 max_mul; > + > + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) > + return false; > + > + /* > + * check if scale would overflow, if so we use the time ref counter > + *tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 > + *tsc_to_system_mul / 100 >= 2^(32-tsc_shift) > + *tsc_to_system_mul >= 100 * 2^(32-tsc_shift) > + */ > + max_mul = 100ull << (32 - hv_clock->tsc_shift); > + if (hv_clock->tsc_to_system_mul >= max_mul) > + return false; > + > + /* > + * Otherwise compute the scale and offset according to the formulas > + * derived above. > + */ > + tsc_ref->tsc_scale = > + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), > + hv_clock->tsc_to_system_mul, > + 100); > + > + tsc_ref->tsc_offset = hv_clock->system_time; > + do_div(tsc_ref->tsc_offset, 100); > + tsc_ref->tsc_offset -= > + mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, > 64); > + return true; Although this is correct, we may want to make it slightly easier to follow: note that the hv_clock contents is essentially populated using vcpu->hw_tsc_khz and vcpu->last_guest_tsc, so at this point we can just directly calculate ->tsc_scale and ->tsc_offset from them. If we also stash them somewhere on vcpu we can make the reference counter use exactly the same procedure as the guest would with tsc page, and guarantee against precision errors. Dunno if that really matters much, though. > +} > + > +void kvm_hv_setup_tsc_page(struct kvm *kvm, > +
Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup
On Thu, Sep 01, 2016 at 05:26:15PM +0200, Paolo Bonzini wrote: > Lately tsc page was implemented but filled with empty > values. This patch setup tsc page scale and offset based > on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. > > The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr > reads count to zero which potentially improves performance. > > Signed-off-by: Andrey Smetanin > Reviewed-by: Peter Hornyack > CC: Paolo Bonzini > CC: Radim Krčmář > CC: Roman Kagan > CC: Denis V. Lunev > [Computation of TSC page parameters rewritten to use the Linux timekeeper > parameters. - Paolo] > Signed-off-by: Paolo Bonzini > --- > arch/x86/kvm/hyperv.c | 141 > -- > arch/x86/kvm/hyperv.h | 3 ++ > arch/x86/kvm/x86.c| 8 +-- > 3 files changed, 133 insertions(+), 19 deletions(-) > > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index ed5b77f39ffb..e089d1f52dc0 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu > *vcpu, > return 0; > } > > +/* > + * The kvmclock and Hyper-V TSC page use similar formulas. Because the KVM > + * one is more precise, it is a little more complex. However, converting I'm not sure you're right regarding which one is more precise :) Hyper-V uses a right shift of 64 which is higher precision than typical kvmclock shift of around 22. > + * between them is possible: > + * > + * kvmclock formula: > + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) > + * + system_time > + * > + * Hyper-V formula: > + *nsec/100 = ticks * scale / 2^64 + offset > + * > + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V > formula. > + * By dividing the kvmclock formula by 100 and equating what's left we get: > + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / > 100 > + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / > 100 > + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / > 100 > + * > + * Now expand the kvmclock formula and divide by 100: > + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) > + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) > + * + system_time > + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 > + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 > + * + system_time / 100 > + * > + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: > + *nsec/100 = ticks * scale / 2^64 > + * - tsc_timestamp * scale / 2^64 > + * + system_time / 100 > + * > + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: > + *offset = system_time / 100 - tsc_timestamp * scale / 2^64 > + * > + * These two equivalencies are implemented in this function. > + */ > +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info > *hv_clock, > + HV_REFERENCE_TSC_PAGE *tsc_ref) > +{ > + u64 max_mul; > + > + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) > + return false; > + > + /* > + * check if scale would overflow, if so we use the time ref counter > + *tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 > + *tsc_to_system_mul / 100 >= 2^(32-tsc_shift) > + *tsc_to_system_mul >= 100 * 2^(32-tsc_shift) > + */ > + max_mul = 100ull << (32 - hv_clock->tsc_shift); > + if (hv_clock->tsc_to_system_mul >= max_mul) > + return false; > + > + /* > + * Otherwise compute the scale and offset according to the formulas > + * derived above. > + */ > + tsc_ref->tsc_scale = > + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), > + hv_clock->tsc_to_system_mul, > + 100); > + > + tsc_ref->tsc_offset = hv_clock->system_time; > + do_div(tsc_ref->tsc_offset, 100); > + tsc_ref->tsc_offset -= > + mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, > 64); > + return true; Although this is correct, we may want to make it slightly easier to follow: note that the hv_clock contents is essentially populated using vcpu->hw_tsc_khz and vcpu->last_guest_tsc, so at this point we can just directly calculate ->tsc_scale and ->tsc_offset from them. If we also stash them somewhere on vcpu we can make the reference counter use exactly the same procedure as the guest would with tsc page, and guarantee against precision errors. Dunno if that really matters much, though. > +} > + > +void kvm_hv_setup_tsc_page(struct kvm *kvm, > +struct pvclock_vcpu_time_info *hv_clock) > +{ > + struct kvm_hv *hv = >arch.hyperv; > + HV_REFERENCE_TSC_PAGE tsc_ref = { 0 }; > + u32
[PATCH 4/4] KVM: x86: Hyper-V tsc page setup
Lately tsc page was implemented but filled with empty values. This patch setup tsc page scale and offset based on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr reads count to zero which potentially improves performance. Signed-off-by: Andrey SmetaninReviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Radim Krčmář CC: Roman Kagan CC: Denis V. Lunev [Computation of TSC page parameters rewritten to use the Linux timekeeper parameters. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 141 -- arch/x86/kvm/hyperv.h | 3 ++ arch/x86/kvm/x86.c| 8 +-- 3 files changed, 133 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed5b77f39ffb..e089d1f52dc0 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, return 0; } +/* + * The kvmclock and Hyper-V TSC page use similar formulas. Because the KVM + * one is more precise, it is a little more complex. However, converting + * between them is possible: + * + * kvmclock formula: + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + * + * Hyper-V formula: + *nsec/100 = ticks * scale / 2^64 + offset + * + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. + * By dividing the kvmclock formula by 100 and equating what's left we get: + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100 + * + * Now expand the kvmclock formula and divide by 100: + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * + system_time / 100 + * + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: + *nsec/100 = ticks * scale / 2^64 + * - tsc_timestamp * scale / 2^64 + * + system_time / 100 + * + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: + *offset = system_time / 100 - tsc_timestamp * scale / 2^64 + * + * These two equivalencies are implemented in this function. + */ +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, + HV_REFERENCE_TSC_PAGE *tsc_ref) +{ + u64 max_mul; + + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) + return false; + + /* +* check if scale would overflow, if so we use the time ref counter +*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 +*tsc_to_system_mul / 100 >= 2^(32-tsc_shift) +*tsc_to_system_mul >= 100 * 2^(32-tsc_shift) +*/ + max_mul = 100ull << (32 - hv_clock->tsc_shift); + if (hv_clock->tsc_to_system_mul >= max_mul) + return false; + + /* +* Otherwise compute the scale and offset according to the formulas +* derived above. +*/ + tsc_ref->tsc_scale = + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), + hv_clock->tsc_to_system_mul, + 100); + + tsc_ref->tsc_offset = hv_clock->system_time; + do_div(tsc_ref->tsc_offset, 100); + tsc_ref->tsc_offset -= + mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); + return true; +} + +void kvm_hv_setup_tsc_page(struct kvm *kvm, + struct pvclock_vcpu_time_info *hv_clock) +{ + struct kvm_hv *hv = >arch.hyperv; + HV_REFERENCE_TSC_PAGE tsc_ref = { 0 }; + u32 tsc_seq; + u64 gfn; + + BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(tsc_ref.tsc_sequence)); + BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); + + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + return; + + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + /* +* Because the TSC parameters only vary when there is a +* change in the master clock, do not bother with caching. +*/ + if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), + _seq, sizeof(tsc_seq + return; + + /* +* While we're computing and writing the parameters, force the +* guest to use the time
[PATCH 4/4] KVM: x86: Hyper-V tsc page setup
Lately tsc page was implemented but filled with empty values. This patch setup tsc page scale and offset based on vcpu tsc, tsc_khz and HV_X64_MSR_TIME_REF_COUNT value. The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr reads count to zero which potentially improves performance. Signed-off-by: Andrey Smetanin Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Radim Krčmář CC: Roman Kagan CC: Denis V. Lunev [Computation of TSC page parameters rewritten to use the Linux timekeeper parameters. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 141 -- arch/x86/kvm/hyperv.h | 3 ++ arch/x86/kvm/x86.c| 8 +-- 3 files changed, 133 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ed5b77f39ffb..e089d1f52dc0 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, return 0; } +/* + * The kvmclock and Hyper-V TSC page use similar formulas. Because the KVM + * one is more precise, it is a little more complex. However, converting + * between them is possible: + * + * kvmclock formula: + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + * + * Hyper-V formula: + *nsec/100 = ticks * scale / 2^64 + offset + * + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula. + * By dividing the kvmclock formula by 100 and equating what's left we get: + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100 + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100 + * + * Now expand the kvmclock formula and divide by 100: + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32) + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) + * + system_time + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100 + * + system_time / 100 + * + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64: + *nsec/100 = ticks * scale / 2^64 + * - tsc_timestamp * scale / 2^64 + * + system_time / 100 + * + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out: + *offset = system_time / 100 - tsc_timestamp * scale / 2^64 + * + * These two equivalencies are implemented in this function. + */ +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, + HV_REFERENCE_TSC_PAGE *tsc_ref) +{ + u64 max_mul; + + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT)) + return false; + + /* +* check if scale would overflow, if so we use the time ref counter +*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64 +*tsc_to_system_mul / 100 >= 2^(32-tsc_shift) +*tsc_to_system_mul >= 100 * 2^(32-tsc_shift) +*/ + max_mul = 100ull << (32 - hv_clock->tsc_shift); + if (hv_clock->tsc_to_system_mul >= max_mul) + return false; + + /* +* Otherwise compute the scale and offset according to the formulas +* derived above. +*/ + tsc_ref->tsc_scale = + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift), + hv_clock->tsc_to_system_mul, + 100); + + tsc_ref->tsc_offset = hv_clock->system_time; + do_div(tsc_ref->tsc_offset, 100); + tsc_ref->tsc_offset -= + mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64); + return true; +} + +void kvm_hv_setup_tsc_page(struct kvm *kvm, + struct pvclock_vcpu_time_info *hv_clock) +{ + struct kvm_hv *hv = >arch.hyperv; + HV_REFERENCE_TSC_PAGE tsc_ref = { 0 }; + u32 tsc_seq; + u64 gfn; + + BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(tsc_ref.tsc_sequence)); + BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); + + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + return; + + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + /* +* Because the TSC parameters only vary when there is a +* change in the master clock, do not bother with caching. +*/ + if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), + _seq, sizeof(tsc_seq + return; + + /* +* While we're computing and writing the parameters, force the +* guest to use the time reference count MSR. +*/ + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), + _ref, sizeof(tsc_ref.tsc_sequence))) +