Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-19 Thread Roman Kagan
On Mon, Sep 19, 2016 at 01:39:13PM +0200, Paolo Bonzini wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
> 
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
> 
> Signed-off-by: Andrey Smetanin 
> Reviewed-by: Peter Hornyack 
> CC: Paolo Bonzini 
> CC: Radim Krčmář 
> CC: Roman Kagan 
> CC: Denis V. Lunev 
> [Computation of TSC page parameters rewritten to use the Linux timekeeper
>  parameters. - Paolo]
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/include/asm/kvm_host.h |   2 +
>  arch/x86/kvm/hyperv.c   | 162 
> 
>  arch/x86/kvm/hyperv.h   |   3 +
>  arch/x86/kvm/x86.c  |   8 +-
>  4 files changed, 155 insertions(+), 20 deletions(-)

Reviewed-by: Roman Kagan 


Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-19 Thread Roman Kagan
On Mon, Sep 19, 2016 at 01:39:13PM +0200, Paolo Bonzini wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
> 
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
> 
> Signed-off-by: Andrey Smetanin 
> Reviewed-by: Peter Hornyack 
> CC: Paolo Bonzini 
> CC: Radim Krčmář 
> CC: Roman Kagan 
> CC: Denis V. Lunev 
> [Computation of TSC page parameters rewritten to use the Linux timekeeper
>  parameters. - Paolo]
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/include/asm/kvm_host.h |   2 +
>  arch/x86/kvm/hyperv.c   | 162 
> 
>  arch/x86/kvm/hyperv.h   |   3 +
>  arch/x86/kvm/x86.c  |   8 +-
>  4 files changed, 155 insertions(+), 20 deletions(-)

Reviewed-by: Roman Kagan 


[PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-19 Thread Paolo Bonzini
Lately tsc page was implemented but filled with empty
values. This patch setup tsc page scale and offset based
on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.

The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
reads count to zero which potentially improves performance.

Signed-off-by: Andrey Smetanin 
Reviewed-by: Peter Hornyack 
CC: Paolo Bonzini 
CC: Radim Krčmář 
CC: Roman Kagan 
CC: Denis V. Lunev 
[Computation of TSC page parameters rewritten to use the Linux timekeeper
 parameters. - Paolo]
Signed-off-by: Paolo Bonzini 
---
 arch/x86/include/asm/kvm_host.h |   2 +
 arch/x86/kvm/hyperv.c   | 162 
 arch/x86/kvm/hyperv.h   |   3 +
 arch/x86/kvm/x86.c  |   8 +-
 4 files changed, 155 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 32a43a25d415..4b20f7304b9c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -702,6 +702,8 @@ struct kvm_hv {
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
u64 hv_crash_ctl;
+
+   HV_REFERENCE_TSC_PAGE tsc_ref;
 };
 
 struct kvm_arch {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ed5b77f39ffb..555951625350 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -386,7 +386,21 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic)
 
 static u64 get_time_ref_counter(struct kvm *kvm)
 {
-   return div_u64(get_kvmclock_ns(kvm), 100);
+   struct kvm_hv *hv = >arch.hyperv;
+   struct kvm_vcpu *vcpu;
+   u64 tsc;
+
+   /*
+* The guest has not set up the TSC page or the clock isn't
+* stable, fall back to get_kvmclock_ns.
+*/
+   if (!hv->tsc_ref.tsc_sequence)
+   return div_u64(get_kvmclock_ns(kvm), 100);
+
+   vcpu = kvm_get_vcpu(kvm, 0);
+   tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+   return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
+   + hv->tsc_ref.tsc_offset;
 }
 
 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
@@ -756,6 +774,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+/*
+ * The kvmclock and Hyper-V TSC page use similar formulas, and converting
+ * between them is possible:
+ *
+ * kvmclock formula:
+ *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *
+ * Hyper-V formula:
+ *nsec/100 = ticks * scale / 2^64 + offset
+ *
+ * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
+ * By dividing the kvmclock formula by 100 and equating what's left we get:
+ *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100
+ *
+ * Now expand the kvmclock formula and divide by 100:
+ *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   + system_time / 100
+ *
+ * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
+ *nsec/100 = ticks * scale / 2^64
+ *   - tsc_timestamp * scale / 2^64
+ *   + system_time / 100
+ *
+ * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
+ *offset = system_time / 100 - tsc_timestamp * scale / 2^64
+ *
+ * These two equivalencies are implemented in this function.
+ */
+static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info 
*hv_clock,
+   HV_REFERENCE_TSC_PAGE *tsc_ref)
+{
+   u64 max_mul;
+
+   if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
+   return false;
+
+   /*
+* check if scale would overflow, if so we use the time ref counter
+*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
+*tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
+*tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
+*/
+   max_mul = 100ull << (32 - hv_clock->tsc_shift);
+   if (hv_clock->tsc_to_system_mul >= max_mul)
+   return false;
+
+   /*
+* Otherwise compute the scale and offset according to the formulas
+* derived above.
+*/
+   tsc_ref->tsc_scale =
+   mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
+   hv_clock->tsc_to_system_mul,
+   100);
+

[PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-19 Thread Paolo Bonzini
Lately tsc page was implemented but filled with empty
values. This patch setup tsc page scale and offset based
on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.

The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
reads count to zero which potentially improves performance.

Signed-off-by: Andrey Smetanin 
Reviewed-by: Peter Hornyack 
CC: Paolo Bonzini 
CC: Radim Krčmář 
CC: Roman Kagan 
CC: Denis V. Lunev 
[Computation of TSC page parameters rewritten to use the Linux timekeeper
 parameters. - Paolo]
Signed-off-by: Paolo Bonzini 
---
 arch/x86/include/asm/kvm_host.h |   2 +
 arch/x86/kvm/hyperv.c   | 162 
 arch/x86/kvm/hyperv.h   |   3 +
 arch/x86/kvm/x86.c  |   8 +-
 4 files changed, 155 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 32a43a25d415..4b20f7304b9c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -702,6 +702,8 @@ struct kvm_hv {
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
u64 hv_crash_ctl;
+
+   HV_REFERENCE_TSC_PAGE tsc_ref;
 };
 
 struct kvm_arch {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ed5b77f39ffb..555951625350 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -386,7 +386,21 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic)
 
 static u64 get_time_ref_counter(struct kvm *kvm)
 {
-   return div_u64(get_kvmclock_ns(kvm), 100);
+   struct kvm_hv *hv = >arch.hyperv;
+   struct kvm_vcpu *vcpu;
+   u64 tsc;
+
+   /*
+* The guest has not set up the TSC page or the clock isn't
+* stable, fall back to get_kvmclock_ns.
+*/
+   if (!hv->tsc_ref.tsc_sequence)
+   return div_u64(get_kvmclock_ns(kvm), 100);
+
+   vcpu = kvm_get_vcpu(kvm, 0);
+   tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+   return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
+   + hv->tsc_ref.tsc_offset;
 }
 
 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
@@ -756,6 +774,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+/*
+ * The kvmclock and Hyper-V TSC page use similar formulas, and converting
+ * between them is possible:
+ *
+ * kvmclock formula:
+ *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *
+ * Hyper-V formula:
+ *nsec/100 = ticks * scale / 2^64 + offset
+ *
+ * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
+ * By dividing the kvmclock formula by 100 and equating what's left we get:
+ *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100
+ *
+ * Now expand the kvmclock formula and divide by 100:
+ *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   + system_time / 100
+ *
+ * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
+ *nsec/100 = ticks * scale / 2^64
+ *   - tsc_timestamp * scale / 2^64
+ *   + system_time / 100
+ *
+ * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
+ *offset = system_time / 100 - tsc_timestamp * scale / 2^64
+ *
+ * These two equivalencies are implemented in this function.
+ */
+static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info 
*hv_clock,
+   HV_REFERENCE_TSC_PAGE *tsc_ref)
+{
+   u64 max_mul;
+
+   if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
+   return false;
+
+   /*
+* check if scale would overflow, if so we use the time ref counter
+*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
+*tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
+*tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
+*/
+   max_mul = 100ull << (32 - hv_clock->tsc_shift);
+   if (hv_clock->tsc_to_system_mul >= max_mul)
+   return false;
+
+   /*
+* Otherwise compute the scale and offset according to the formulas
+* derived above.
+*/
+   tsc_ref->tsc_scale =
+   mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
+   hv_clock->tsc_to_system_mul,
+   100);
+
+   tsc_ref->tsc_offset = hv_clock->system_time;
+   do_div(tsc_ref->tsc_offset, 100);
+   tsc_ref->tsc_offset -=
+   

Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-02 Thread Roman Kagan
On Thu, Sep 01, 2016 at 05:26:15PM +0200, Paolo Bonzini wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
> 
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
> 
> Signed-off-by: Andrey Smetanin 
> Reviewed-by: Peter Hornyack 
> CC: Paolo Bonzini 
> CC: Radim Krčmář 
> CC: Roman Kagan 
> CC: Denis V. Lunev 
> [Computation of TSC page parameters rewritten to use the Linux timekeeper
>  parameters. - Paolo]
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/hyperv.c | 141 
> --
>  arch/x86/kvm/hyperv.h |   3 ++
>  arch/x86/kvm/x86.c|   8 +--
>  3 files changed, 133 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index ed5b77f39ffb..e089d1f52dc0 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu 
> *vcpu,
>   return 0;
>  }
>  
> +/*
> + * The kvmclock and Hyper-V TSC page use similar formulas.  Because the KVM
> + * one is more precise, it is a little more complex.  However, converting

I'm not sure you're right regarding which one is more precise :)
Hyper-V uses a right shift of 64 which is higher precision than typical
kvmclock shift of around 22.

> + * between them is possible:
> + *
> + * kvmclock formula:
> + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
> + *   + system_time
> + *
> + * Hyper-V formula:
> + *nsec/100 = ticks * scale / 2^64 + offset
> + *
> + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V 
> formula.
> + * By dividing the kvmclock formula by 100 and equating what's left we get:
> + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 
> 100
> + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 
> 100
> + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 
> 100
> + *
> + * Now expand the kvmclock formula and divide by 100:
> + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
> + *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
> + *   + system_time
> + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
> + *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
> + *   + system_time / 100
> + *
> + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
> + *nsec/100 = ticks * scale / 2^64
> + *   - tsc_timestamp * scale / 2^64
> + *   + system_time / 100
> + *
> + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
> + *offset = system_time / 100 - tsc_timestamp * scale / 2^64
> + *
> + * These two equivalencies are implemented in this function.
> + */
> +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info 
> *hv_clock,
> + HV_REFERENCE_TSC_PAGE *tsc_ref)
> +{
> + u64 max_mul;
> +
> + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
> + return false;
> +
> + /*
> +  * check if scale would overflow, if so we use the time ref counter
> +  *tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
> +  *tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
> +  *tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
> +  */
> + max_mul = 100ull << (32 - hv_clock->tsc_shift);
> + if (hv_clock->tsc_to_system_mul >= max_mul)
> + return false;
> +
> + /*
> +  * Otherwise compute the scale and offset according to the formulas
> +  * derived above.
> +  */
> + tsc_ref->tsc_scale =
> + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
> + hv_clock->tsc_to_system_mul,
> + 100);
> +
> + tsc_ref->tsc_offset = hv_clock->system_time;
> + do_div(tsc_ref->tsc_offset, 100);
> + tsc_ref->tsc_offset -=
> + mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 
> 64);
> + return true;

Although this is correct, we may want to make it slightly easier to
follow: note that the hv_clock contents is essentially populated using
vcpu->hw_tsc_khz and vcpu->last_guest_tsc, so at this point we can just
directly calculate ->tsc_scale and ->tsc_offset from them.  If we also
stash them somewhere on vcpu we can make the reference counter use
exactly the same procedure as the guest would with tsc page, and
guarantee against precision errors.

Dunno if that really matters much, though.

> +}
> +
> +void kvm_hv_setup_tsc_page(struct kvm *kvm,
> +

Re: [PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-02 Thread Roman Kagan
On Thu, Sep 01, 2016 at 05:26:15PM +0200, Paolo Bonzini wrote:
> Lately tsc page was implemented but filled with empty
> values. This patch setup tsc page scale and offset based
> on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.
> 
> The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
> reads count to zero which potentially improves performance.
> 
> Signed-off-by: Andrey Smetanin 
> Reviewed-by: Peter Hornyack 
> CC: Paolo Bonzini 
> CC: Radim Krčmář 
> CC: Roman Kagan 
> CC: Denis V. Lunev 
> [Computation of TSC page parameters rewritten to use the Linux timekeeper
>  parameters. - Paolo]
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/hyperv.c | 141 
> --
>  arch/x86/kvm/hyperv.h |   3 ++
>  arch/x86/kvm/x86.c|   8 +--
>  3 files changed, 133 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index ed5b77f39ffb..e089d1f52dc0 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu 
> *vcpu,
>   return 0;
>  }
>  
> +/*
> + * The kvmclock and Hyper-V TSC page use similar formulas.  Because the KVM
> + * one is more precise, it is a little more complex.  However, converting

I'm not sure you're right regarding which one is more precise :)
Hyper-V uses a right shift of 64 which is higher precision than typical
kvmclock shift of around 22.

> + * between them is possible:
> + *
> + * kvmclock formula:
> + *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
> + *   + system_time
> + *
> + * Hyper-V formula:
> + *nsec/100 = ticks * scale / 2^64 + offset
> + *
> + * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V 
> formula.
> + * By dividing the kvmclock formula by 100 and equating what's left we get:
> + *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 
> 100
> + *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 
> 100
> + *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 
> 100
> + *
> + * Now expand the kvmclock formula and divide by 100:
> + *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
> + *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
> + *   + system_time
> + *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
> + *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
> + *   + system_time / 100
> + *
> + * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
> + *nsec/100 = ticks * scale / 2^64
> + *   - tsc_timestamp * scale / 2^64
> + *   + system_time / 100
> + *
> + * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
> + *offset = system_time / 100 - tsc_timestamp * scale / 2^64
> + *
> + * These two equivalencies are implemented in this function.
> + */
> +static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info 
> *hv_clock,
> + HV_REFERENCE_TSC_PAGE *tsc_ref)
> +{
> + u64 max_mul;
> +
> + if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
> + return false;
> +
> + /*
> +  * check if scale would overflow, if so we use the time ref counter
> +  *tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
> +  *tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
> +  *tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
> +  */
> + max_mul = 100ull << (32 - hv_clock->tsc_shift);
> + if (hv_clock->tsc_to_system_mul >= max_mul)
> + return false;
> +
> + /*
> +  * Otherwise compute the scale and offset according to the formulas
> +  * derived above.
> +  */
> + tsc_ref->tsc_scale =
> + mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
> + hv_clock->tsc_to_system_mul,
> + 100);
> +
> + tsc_ref->tsc_offset = hv_clock->system_time;
> + do_div(tsc_ref->tsc_offset, 100);
> + tsc_ref->tsc_offset -=
> + mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 
> 64);
> + return true;

Although this is correct, we may want to make it slightly easier to
follow: note that the hv_clock contents is essentially populated using
vcpu->hw_tsc_khz and vcpu->last_guest_tsc, so at this point we can just
directly calculate ->tsc_scale and ->tsc_offset from them.  If we also
stash them somewhere on vcpu we can make the reference counter use
exactly the same procedure as the guest would with tsc page, and
guarantee against precision errors.

Dunno if that really matters much, though.

> +}
> +
> +void kvm_hv_setup_tsc_page(struct kvm *kvm,
> +struct pvclock_vcpu_time_info *hv_clock)
> +{
> + struct kvm_hv *hv = >arch.hyperv;
> + HV_REFERENCE_TSC_PAGE tsc_ref = { 0 };
> + u32 

[PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-01 Thread Paolo Bonzini
Lately tsc page was implemented but filled with empty
values. This patch setup tsc page scale and offset based
on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.

The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
reads count to zero which potentially improves performance.

Signed-off-by: Andrey Smetanin 
Reviewed-by: Peter Hornyack 
CC: Paolo Bonzini 
CC: Radim Krčmář 
CC: Roman Kagan 
CC: Denis V. Lunev 
[Computation of TSC page parameters rewritten to use the Linux timekeeper
 parameters. - Paolo]
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/hyperv.c | 141 --
 arch/x86/kvm/hyperv.h |   3 ++
 arch/x86/kvm/x86.c|   8 +--
 3 files changed, 133 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ed5b77f39ffb..e089d1f52dc0 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+/*
+ * The kvmclock and Hyper-V TSC page use similar formulas.  Because the KVM
+ * one is more precise, it is a little more complex.  However, converting
+ * between them is possible:
+ *
+ * kvmclock formula:
+ *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *
+ * Hyper-V formula:
+ *nsec/100 = ticks * scale / 2^64 + offset
+ *
+ * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
+ * By dividing the kvmclock formula by 100 and equating what's left we get:
+ *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100
+ *
+ * Now expand the kvmclock formula and divide by 100:
+ *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   + system_time / 100
+ *
+ * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
+ *nsec/100 = ticks * scale / 2^64
+ *   - tsc_timestamp * scale / 2^64
+ *   + system_time / 100
+ *
+ * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
+ *offset = system_time / 100 - tsc_timestamp * scale / 2^64
+ *
+ * These two equivalencies are implemented in this function.
+ */
+static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info 
*hv_clock,
+   HV_REFERENCE_TSC_PAGE *tsc_ref)
+{
+   u64 max_mul;
+
+   if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
+   return false;
+
+   /*
+* check if scale would overflow, if so we use the time ref counter
+*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
+*tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
+*tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
+*/
+   max_mul = 100ull << (32 - hv_clock->tsc_shift);
+   if (hv_clock->tsc_to_system_mul >= max_mul)
+   return false;
+
+   /*
+* Otherwise compute the scale and offset according to the formulas
+* derived above.
+*/
+   tsc_ref->tsc_scale =
+   mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
+   hv_clock->tsc_to_system_mul,
+   100);
+
+   tsc_ref->tsc_offset = hv_clock->system_time;
+   do_div(tsc_ref->tsc_offset, 100);
+   tsc_ref->tsc_offset -=
+   mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 
64);
+   return true;
+}
+
+void kvm_hv_setup_tsc_page(struct kvm *kvm,
+  struct pvclock_vcpu_time_info *hv_clock)
+{
+   struct kvm_hv *hv = >arch.hyperv;
+   HV_REFERENCE_TSC_PAGE tsc_ref = { 0 };
+   u32 tsc_seq;
+   u64 gfn;
+
+   BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(tsc_ref.tsc_sequence));
+   BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
+
+   if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+   return;
+
+   gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+   /*
+* Because the TSC parameters only vary when there is a
+* change in the master clock, do not bother with caching.
+*/
+   if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
+   _seq, sizeof(tsc_seq
+   return;
+
+   /*
+* While we're computing and writing the parameters, force the
+* guest to use the time 

[PATCH 4/4] KVM: x86: Hyper-V tsc page setup

2016-09-01 Thread Paolo Bonzini
Lately tsc page was implemented but filled with empty
values. This patch setup tsc page scale and offset based
on vcpu tsc, tsc_khz and  HV_X64_MSR_TIME_REF_COUNT value.

The valid tsc page drops HV_X64_MSR_TIME_REF_COUNT msr
reads count to zero which potentially improves performance.

Signed-off-by: Andrey Smetanin 
Reviewed-by: Peter Hornyack 
CC: Paolo Bonzini 
CC: Radim Krčmář 
CC: Roman Kagan 
CC: Denis V. Lunev 
[Computation of TSC page parameters rewritten to use the Linux timekeeper
 parameters. - Paolo]
Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/hyperv.c | 141 --
 arch/x86/kvm/hyperv.h |   3 ++
 arch/x86/kvm/x86.c|   8 +--
 3 files changed, 133 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ed5b77f39ffb..e089d1f52dc0 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -756,6 +756,129 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+/*
+ * The kvmclock and Hyper-V TSC page use similar formulas.  Because the KVM
+ * one is more precise, it is a little more complex.  However, converting
+ * between them is possible:
+ *
+ * kvmclock formula:
+ *nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *
+ * Hyper-V formula:
+ *nsec/100 = ticks * scale / 2^64 + offset
+ *
+ * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
+ * By dividing the kvmclock formula by 100 and equating what's left we get:
+ *ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *scale= tsc_to_system_mul * 2^(32+tsc_shift) / 100
+ *
+ * Now expand the kvmclock formula and divide by 100:
+ *nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
+ *   + system_time
+ *nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
+ *   + system_time / 100
+ *
+ * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
+ *nsec/100 = ticks * scale / 2^64
+ *   - tsc_timestamp * scale / 2^64
+ *   + system_time / 100
+ *
+ * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
+ *offset = system_time / 100 - tsc_timestamp * scale / 2^64
+ *
+ * These two equivalencies are implemented in this function.
+ */
+static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info 
*hv_clock,
+   HV_REFERENCE_TSC_PAGE *tsc_ref)
+{
+   u64 max_mul;
+
+   if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
+   return false;
+
+   /*
+* check if scale would overflow, if so we use the time ref counter
+*tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
+*tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
+*tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
+*/
+   max_mul = 100ull << (32 - hv_clock->tsc_shift);
+   if (hv_clock->tsc_to_system_mul >= max_mul)
+   return false;
+
+   /*
+* Otherwise compute the scale and offset according to the formulas
+* derived above.
+*/
+   tsc_ref->tsc_scale =
+   mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
+   hv_clock->tsc_to_system_mul,
+   100);
+
+   tsc_ref->tsc_offset = hv_clock->system_time;
+   do_div(tsc_ref->tsc_offset, 100);
+   tsc_ref->tsc_offset -=
+   mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 
64);
+   return true;
+}
+
+void kvm_hv_setup_tsc_page(struct kvm *kvm,
+  struct pvclock_vcpu_time_info *hv_clock)
+{
+   struct kvm_hv *hv = >arch.hyperv;
+   HV_REFERENCE_TSC_PAGE tsc_ref = { 0 };
+   u32 tsc_seq;
+   u64 gfn;
+
+   BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(tsc_ref.tsc_sequence));
+   BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
+
+   if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+   return;
+
+   gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+   /*
+* Because the TSC parameters only vary when there is a
+* change in the master clock, do not bother with caching.
+*/
+   if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
+   _seq, sizeof(tsc_seq
+   return;
+
+   /*
+* While we're computing and writing the parameters, force the
+* guest to use the time reference count MSR.
+*/
+   if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+   _ref, sizeof(tsc_ref.tsc_sequence)))
+