Re: [PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-19 Thread Roman Kagan
On Mon, Sep 19, 2016 at 01:39:10PM +0200, Paolo Bonzini wrote:
> We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
> contents of the Hyper-V TSC page.  Get the values from the Linux
> timekeeper even if kvmclock is not enabled.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/x86.c | 109 
> +
>  1 file changed, 59 insertions(+), 50 deletions(-)

Reviewed-by: Roman Kagan 


Re: [PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-19 Thread Roman Kagan
On Mon, Sep 19, 2016 at 01:39:10PM +0200, Paolo Bonzini wrote:
> We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
> contents of the Hyper-V TSC page.  Get the values from the Linux
> timekeeper even if kvmclock is not enabled.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/x86.c | 109 
> +
>  1 file changed, 59 insertions(+), 50 deletions(-)

Reviewed-by: Roman Kagan 


[PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-19 Thread Paolo Bonzini
We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
contents of the Hyper-V TSC page.  Get the values from the Linux
timekeeper even if kvmclock is not enabled.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/x86.c | 109 +
 1 file changed, 59 insertions(+), 50 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b4ae92599d34..d1e830715e40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1722,6 +1722,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
+static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
+{
+   struct kvm_vcpu_arch *vcpu = >arch;
+   struct pvclock_vcpu_time_info guest_hv_clock;
+
+   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
+   _hv_clock, sizeof(guest_hv_clock
+   return;
+
+   /* This VCPU is paused, but it's legal for a guest to read another
+* VCPU's kvmclock, so we really have to follow the specification where
+* it says that version is odd if data is being modified, and even after
+* it is consistent.
+*
+* Version field updates must be kept separate.  This is because
+* kvm_write_guest_cached might use a "rep movs" instruction, and
+* writes within a string instruction are weakly ordered.  So there
+* are three writes overall.
+*
+* As a small optimization, only write the version field in the first
+* and third write.  The vcpu->pv_time cache is still valid, because the
+* version field is the first in the struct.
+*/
+   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+   vcpu->hv_clock.version = guest_hv_clock.version + 1;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+
+   smp_wmb();
+
+   /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+   vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
+
+   if (vcpu->pvclock_set_guest_stopped_request) {
+   vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+   vcpu->pvclock_set_guest_stopped_request = false;
+   }
+
+   trace_kvm_pvclock_update(v->vcpu_id, >hv_clock);
+
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock));
+
+   smp_wmb();
+
+   vcpu->hv_clock.version++;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
unsigned long flags, tgt_tsc_khz;
@@ -1729,7 +1783,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
struct kvm_arch *ka = >kvm->arch;
s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
-   struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
bool use_master_clock;
 
@@ -1783,8 +1836,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
local_irq_restore(flags);
 
-   if (!vcpu->pv_time_enabled)
-   return 0;
+   /* With all the info we got, fill in the values */
 
if (kvm_has_tsc_control)
tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
@@ -1796,64 +1848,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hw_tsc_khz = tgt_tsc_khz;
}
 
-   /* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
 
-   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
-   _hv_clock, sizeof(guest_hv_clock
-   return 0;
-
-   /* This VCPU is paused, but it's legal for a guest to read another
-* VCPU's kvmclock, so we really have to follow the specification where
-* it says that version is odd if data is being modified, and even after
-* it is consistent.
-*
-* Version field updates must be kept separate.  This is because
-* kvm_write_guest_cached might use a "rep movs" instruction, and
-* writes within a string instruction are weakly ordered.  So there
-* are three writes overall.
-*
-* As a small optimization, only write the version field in the first
-* and third write.  The vcpu->pv_time cache is still valid, because the
-* version field is the first in the struct.
-*/
-   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
-
-   vcpu->hv_clock.version = guest_hv_clock.version + 1;
-   kvm_write_guest_cached(v->kvm, >pv_time,
-   

[PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-19 Thread Paolo Bonzini
We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
contents of the Hyper-V TSC page.  Get the values from the Linux
timekeeper even if kvmclock is not enabled.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/x86.c | 109 +
 1 file changed, 59 insertions(+), 50 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b4ae92599d34..d1e830715e40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1722,6 +1722,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
+static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
+{
+   struct kvm_vcpu_arch *vcpu = >arch;
+   struct pvclock_vcpu_time_info guest_hv_clock;
+
+   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
+   _hv_clock, sizeof(guest_hv_clock
+   return;
+
+   /* This VCPU is paused, but it's legal for a guest to read another
+* VCPU's kvmclock, so we really have to follow the specification where
+* it says that version is odd if data is being modified, and even after
+* it is consistent.
+*
+* Version field updates must be kept separate.  This is because
+* kvm_write_guest_cached might use a "rep movs" instruction, and
+* writes within a string instruction are weakly ordered.  So there
+* are three writes overall.
+*
+* As a small optimization, only write the version field in the first
+* and third write.  The vcpu->pv_time cache is still valid, because the
+* version field is the first in the struct.
+*/
+   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+   vcpu->hv_clock.version = guest_hv_clock.version + 1;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+
+   smp_wmb();
+
+   /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+   vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
+
+   if (vcpu->pvclock_set_guest_stopped_request) {
+   vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+   vcpu->pvclock_set_guest_stopped_request = false;
+   }
+
+   trace_kvm_pvclock_update(v->vcpu_id, >hv_clock);
+
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock));
+
+   smp_wmb();
+
+   vcpu->hv_clock.version++;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
unsigned long flags, tgt_tsc_khz;
@@ -1729,7 +1783,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
struct kvm_arch *ka = >kvm->arch;
s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
-   struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
bool use_master_clock;
 
@@ -1783,8 +1836,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
local_irq_restore(flags);
 
-   if (!vcpu->pv_time_enabled)
-   return 0;
+   /* With all the info we got, fill in the values */
 
if (kvm_has_tsc_control)
tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
@@ -1796,64 +1848,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hw_tsc_khz = tgt_tsc_khz;
}
 
-   /* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
 
-   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
-   _hv_clock, sizeof(guest_hv_clock
-   return 0;
-
-   /* This VCPU is paused, but it's legal for a guest to read another
-* VCPU's kvmclock, so we really have to follow the specification where
-* it says that version is odd if data is being modified, and even after
-* it is consistent.
-*
-* Version field updates must be kept separate.  This is because
-* kvm_write_guest_cached might use a "rep movs" instruction, and
-* writes within a string instruction are weakly ordered.  So there
-* are three writes overall.
-*
-* As a small optimization, only write the version field in the first
-* and third write.  The vcpu->pv_time cache is still valid, because the
-* version field is the first in the struct.
-*/
-   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
-
-   vcpu->hv_clock.version = guest_hv_clock.version + 1;
-   kvm_write_guest_cached(v->kvm, >pv_time,
-   >hv_clock,
-

Re: [PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-02 Thread Roman Kagan
On Thu, Sep 01, 2016 at 05:26:12PM +0200, Paolo Bonzini wrote:
> We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
> contents of the Hyper-V TSC page.  Get the values from the Linux
> timekeeper even if kvmclock is not enabled.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/x86.c | 109 
> +
>  1 file changed, 59 insertions(+), 50 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 19f9f9e05c2a..65974dd0565f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1716,6 +1716,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
>  #endif
>  }
>  
> +static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
> +{
> + struct kvm_vcpu_arch *vcpu = >arch;
> + struct pvclock_vcpu_time_info guest_hv_clock;
> +
> + if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
> + _hv_clock, sizeof(guest_hv_clock
> + return;
> +
> + /* This VCPU is paused, but it's legal for a guest to read another
> +  * VCPU's kvmclock, so we really have to follow the specification where
> +  * it says that version is odd if data is being modified, and even after
> +  * it is consistent.
> +  *
> +  * Version field updates must be kept separate.  This is because
> +  * kvm_write_guest_cached might use a "rep movs" instruction, and
> +  * writes within a string instruction are weakly ordered.  So there
> +  * are three writes overall.
> +  *
> +  * As a small optimization, only write the version field in the first
> +  * and third write.  The vcpu->pv_time cache is still valid, because the
> +  * version field is the first in the struct.
> +  */
> + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
> +
> + vcpu->hv_clock.version = guest_hv_clock.version + 1;
> + kvm_write_guest_cached(v->kvm, >pv_time,
> + >hv_clock,
> + sizeof(vcpu->hv_clock.version));
> +
> + smp_wmb();
> +
> + /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
> + vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
> +
> + if (vcpu->pvclock_set_guest_stopped_request) {
> + vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> + vcpu->pvclock_set_guest_stopped_request = false;
> + }
> +
> + trace_kvm_pvclock_update(v->vcpu_id, >hv_clock);
> +
> + kvm_write_guest_cached(v->kvm, >pv_time,
> + >hv_clock,
> + sizeof(vcpu->hv_clock));
> +
> + smp_wmb();
> +
> + vcpu->hv_clock.version++;
> + kvm_write_guest_cached(v->kvm, >pv_time,
> + >hv_clock,
> + sizeof(vcpu->hv_clock.version));
> +}
> +
>  static int kvm_guest_time_update(struct kvm_vcpu *v)
>  {
>   unsigned long flags, tgt_tsc_khz;
> @@ -1723,7 +1777,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>   struct kvm_arch *ka = >kvm->arch;
>   s64 kernel_ns;
>   u64 tsc_timestamp, host_tsc;
> - struct pvclock_vcpu_time_info guest_hv_clock;
>   u8 pvclock_flags;
>   bool use_master_clock;
>  
> @@ -1777,8 +1830,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>  
>   local_irq_restore(flags);
>  
> - if (!vcpu->pv_time_enabled)
> - return 0;

Strictly speaking, you only need .hv_clock updated if either kvmclock or
tsc_ref_page is enabled, so you may want to still skip the calculations
otherwise.

> + /* With all the info we got, fill in the values */
>  
>   if (kvm_has_tsc_control)
>   tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
> @@ -1790,64 +1842,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>   vcpu->hw_tsc_khz = tgt_tsc_khz;
>   }
>  
> - /* With all the info we got, fill in the values */
>   vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
>   vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
>   vcpu->last_guest_tsc = tsc_timestamp;
>  
> - if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
> - _hv_clock, sizeof(guest_hv_clock
> - return 0;
> -
> - /* This VCPU is paused, but it's legal for a guest to read another
> -  * VCPU's kvmclock, so we really have to follow the specification where
> -  * it says that version is odd if data is being modified, and even after
> -  * it is consistent.
> -  *
> -  * Version field updates must be kept separate.  This is because
> -  * kvm_write_guest_cached might use a "rep movs" instruction, and
> -  * writes within a string instruction are weakly ordered.  So there
> -  * are three writes overall.
> -  *
> -  * As a small optimization, only write the version field in the first
> -  * and third write.  The vcpu->pv_time cache is 

Re: [PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-02 Thread Roman Kagan
On Thu, Sep 01, 2016 at 05:26:12PM +0200, Paolo Bonzini wrote:
> We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
> contents of the Hyper-V TSC page.  Get the values from the Linux
> timekeeper even if kvmclock is not enabled.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  arch/x86/kvm/x86.c | 109 
> +
>  1 file changed, 59 insertions(+), 50 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 19f9f9e05c2a..65974dd0565f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1716,6 +1716,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
>  #endif
>  }
>  
> +static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
> +{
> + struct kvm_vcpu_arch *vcpu = >arch;
> + struct pvclock_vcpu_time_info guest_hv_clock;
> +
> + if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
> + _hv_clock, sizeof(guest_hv_clock
> + return;
> +
> + /* This VCPU is paused, but it's legal for a guest to read another
> +  * VCPU's kvmclock, so we really have to follow the specification where
> +  * it says that version is odd if data is being modified, and even after
> +  * it is consistent.
> +  *
> +  * Version field updates must be kept separate.  This is because
> +  * kvm_write_guest_cached might use a "rep movs" instruction, and
> +  * writes within a string instruction are weakly ordered.  So there
> +  * are three writes overall.
> +  *
> +  * As a small optimization, only write the version field in the first
> +  * and third write.  The vcpu->pv_time cache is still valid, because the
> +  * version field is the first in the struct.
> +  */
> + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
> +
> + vcpu->hv_clock.version = guest_hv_clock.version + 1;
> + kvm_write_guest_cached(v->kvm, >pv_time,
> + >hv_clock,
> + sizeof(vcpu->hv_clock.version));
> +
> + smp_wmb();
> +
> + /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
> + vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
> +
> + if (vcpu->pvclock_set_guest_stopped_request) {
> + vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> + vcpu->pvclock_set_guest_stopped_request = false;
> + }
> +
> + trace_kvm_pvclock_update(v->vcpu_id, >hv_clock);
> +
> + kvm_write_guest_cached(v->kvm, >pv_time,
> + >hv_clock,
> + sizeof(vcpu->hv_clock));
> +
> + smp_wmb();
> +
> + vcpu->hv_clock.version++;
> + kvm_write_guest_cached(v->kvm, >pv_time,
> + >hv_clock,
> + sizeof(vcpu->hv_clock.version));
> +}
> +
>  static int kvm_guest_time_update(struct kvm_vcpu *v)
>  {
>   unsigned long flags, tgt_tsc_khz;
> @@ -1723,7 +1777,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>   struct kvm_arch *ka = >kvm->arch;
>   s64 kernel_ns;
>   u64 tsc_timestamp, host_tsc;
> - struct pvclock_vcpu_time_info guest_hv_clock;
>   u8 pvclock_flags;
>   bool use_master_clock;
>  
> @@ -1777,8 +1830,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>  
>   local_irq_restore(flags);
>  
> - if (!vcpu->pv_time_enabled)
> - return 0;

Strictly speaking, you only need .hv_clock updated if either kvmclock or
tsc_ref_page is enabled, so you may want to still skip the calculations
otherwise.

> + /* With all the info we got, fill in the values */
>  
>   if (kvm_has_tsc_control)
>   tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
> @@ -1790,64 +1842,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>   vcpu->hw_tsc_khz = tgt_tsc_khz;
>   }
>  
> - /* With all the info we got, fill in the values */
>   vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
>   vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
>   vcpu->last_guest_tsc = tsc_timestamp;
>  
> - if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
> - _hv_clock, sizeof(guest_hv_clock
> - return 0;
> -
> - /* This VCPU is paused, but it's legal for a guest to read another
> -  * VCPU's kvmclock, so we really have to follow the specification where
> -  * it says that version is odd if data is being modified, and even after
> -  * it is consistent.
> -  *
> -  * Version field updates must be kept separate.  This is because
> -  * kvm_write_guest_cached might use a "rep movs" instruction, and
> -  * writes within a string instruction are weakly ordered.  So there
> -  * are three writes overall.
> -  *
> -  * As a small optimization, only write the version field in the first
> -  * and third write.  The vcpu->pv_time cache is still valid, because the

Re: [PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-02 Thread Paolo Bonzini


On 02/09/2016 15:31, Roman Kagan wrote:
> >  
> > -   if (!vcpu->pv_time_enabled)
> > -   return 0;
> 
> Strictly speaking, you only need .hv_clock updated if either kvmclock or
> tsc_ref_page is enabled, so you may want to still skip the calculations
> otherwise.

Yeah, but that's really a rare case so I don't think it's worth it...

Paolo


Re: [PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-02 Thread Paolo Bonzini


On 02/09/2016 15:31, Roman Kagan wrote:
> >  
> > -   if (!vcpu->pv_time_enabled)
> > -   return 0;
> 
> Strictly speaking, you only need .hv_clock updated if either kvmclock or
> tsc_ref_page is enabled, so you may want to still skip the calculations
> otherwise.

Yeah, but that's really a rare case so I don't think it's worth it...

Paolo


[PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-01 Thread Paolo Bonzini
We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
contents of the Hyper-V TSC page.  Get the values from the Linux
timekeeper even if kvmclock is not enabled.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/x86.c | 109 +
 1 file changed, 59 insertions(+), 50 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19f9f9e05c2a..65974dd0565f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1716,6 +1716,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
+static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
+{
+   struct kvm_vcpu_arch *vcpu = >arch;
+   struct pvclock_vcpu_time_info guest_hv_clock;
+
+   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
+   _hv_clock, sizeof(guest_hv_clock
+   return;
+
+   /* This VCPU is paused, but it's legal for a guest to read another
+* VCPU's kvmclock, so we really have to follow the specification where
+* it says that version is odd if data is being modified, and even after
+* it is consistent.
+*
+* Version field updates must be kept separate.  This is because
+* kvm_write_guest_cached might use a "rep movs" instruction, and
+* writes within a string instruction are weakly ordered.  So there
+* are three writes overall.
+*
+* As a small optimization, only write the version field in the first
+* and third write.  The vcpu->pv_time cache is still valid, because the
+* version field is the first in the struct.
+*/
+   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+   vcpu->hv_clock.version = guest_hv_clock.version + 1;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+
+   smp_wmb();
+
+   /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+   vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
+
+   if (vcpu->pvclock_set_guest_stopped_request) {
+   vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+   vcpu->pvclock_set_guest_stopped_request = false;
+   }
+
+   trace_kvm_pvclock_update(v->vcpu_id, >hv_clock);
+
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock));
+
+   smp_wmb();
+
+   vcpu->hv_clock.version++;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
unsigned long flags, tgt_tsc_khz;
@@ -1723,7 +1777,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
struct kvm_arch *ka = >kvm->arch;
s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
-   struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
bool use_master_clock;
 
@@ -1777,8 +1830,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
local_irq_restore(flags);
 
-   if (!vcpu->pv_time_enabled)
-   return 0;
+   /* With all the info we got, fill in the values */
 
if (kvm_has_tsc_control)
tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
@@ -1790,64 +1842,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hw_tsc_khz = tgt_tsc_khz;
}
 
-   /* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
 
-   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
-   _hv_clock, sizeof(guest_hv_clock
-   return 0;
-
-   /* This VCPU is paused, but it's legal for a guest to read another
-* VCPU's kvmclock, so we really have to follow the specification where
-* it says that version is odd if data is being modified, and even after
-* it is consistent.
-*
-* Version field updates must be kept separate.  This is because
-* kvm_write_guest_cached might use a "rep movs" instruction, and
-* writes within a string instruction are weakly ordered.  So there
-* are three writes overall.
-*
-* As a small optimization, only write the version field in the first
-* and third write.  The vcpu->pv_time cache is still valid, because the
-* version field is the first in the struct.
-*/
-   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
-
-   vcpu->hv_clock.version = guest_hv_clock.version + 1;
-   kvm_write_guest_cached(v->kvm, >pv_time,
-   

[PATCH 1/4] KVM: x86: always fill in vcpu->arch.hv_clock

2016-09-01 Thread Paolo Bonzini
We will use it in the next patches for KVM_GET_CLOCK and as a basis for the
contents of the Hyper-V TSC page.  Get the values from the Linux
timekeeper even if kvmclock is not enabled.

Signed-off-by: Paolo Bonzini 
---
 arch/x86/kvm/x86.c | 109 +
 1 file changed, 59 insertions(+), 50 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19f9f9e05c2a..65974dd0565f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1716,6 +1716,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
+static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
+{
+   struct kvm_vcpu_arch *vcpu = >arch;
+   struct pvclock_vcpu_time_info guest_hv_clock;
+
+   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
+   _hv_clock, sizeof(guest_hv_clock
+   return;
+
+   /* This VCPU is paused, but it's legal for a guest to read another
+* VCPU's kvmclock, so we really have to follow the specification where
+* it says that version is odd if data is being modified, and even after
+* it is consistent.
+*
+* Version field updates must be kept separate.  This is because
+* kvm_write_guest_cached might use a "rep movs" instruction, and
+* writes within a string instruction are weakly ordered.  So there
+* are three writes overall.
+*
+* As a small optimization, only write the version field in the first
+* and third write.  The vcpu->pv_time cache is still valid, because the
+* version field is the first in the struct.
+*/
+   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+   vcpu->hv_clock.version = guest_hv_clock.version + 1;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+
+   smp_wmb();
+
+   /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+   vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
+
+   if (vcpu->pvclock_set_guest_stopped_request) {
+   vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+   vcpu->pvclock_set_guest_stopped_request = false;
+   }
+
+   trace_kvm_pvclock_update(v->vcpu_id, >hv_clock);
+
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock));
+
+   smp_wmb();
+
+   vcpu->hv_clock.version++;
+   kvm_write_guest_cached(v->kvm, >pv_time,
+   >hv_clock,
+   sizeof(vcpu->hv_clock.version));
+}
+
 static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
unsigned long flags, tgt_tsc_khz;
@@ -1723,7 +1777,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
struct kvm_arch *ka = >kvm->arch;
s64 kernel_ns;
u64 tsc_timestamp, host_tsc;
-   struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
bool use_master_clock;
 
@@ -1777,8 +1830,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
local_irq_restore(flags);
 
-   if (!vcpu->pv_time_enabled)
-   return 0;
+   /* With all the info we got, fill in the values */
 
if (kvm_has_tsc_control)
tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
@@ -1790,64 +1842,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hw_tsc_khz = tgt_tsc_khz;
}
 
-   /* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_guest_tsc = tsc_timestamp;
 
-   if (unlikely(kvm_read_guest_cached(v->kvm, >pv_time,
-   _hv_clock, sizeof(guest_hv_clock
-   return 0;
-
-   /* This VCPU is paused, but it's legal for a guest to read another
-* VCPU's kvmclock, so we really have to follow the specification where
-* it says that version is odd if data is being modified, and even after
-* it is consistent.
-*
-* Version field updates must be kept separate.  This is because
-* kvm_write_guest_cached might use a "rep movs" instruction, and
-* writes within a string instruction are weakly ordered.  So there
-* are three writes overall.
-*
-* As a small optimization, only write the version field in the first
-* and third write.  The vcpu->pv_time cache is still valid, because the
-* version field is the first in the struct.
-*/
-   BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
-
-   vcpu->hv_clock.version = guest_hv_clock.version + 1;
-   kvm_write_guest_cached(v->kvm, >pv_time,
-   >hv_clock,
-