Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2014-01-14 Thread Marcelo Tosatti
On Mon, Jan 13, 2014 at 11:11:40PM -0500, Vadim Rozenfeld wrote:
 
 
 - Original Message -
 From: Marcelo Tosatti mtosa...@redhat.com
 To: Vadim Rozenfeld vroze...@redhat.com
 Cc: kvm@vger.kernel.org, p...@dlhnet.de, pbonz...@redhat.com
 Sent: Thursday, December 12, 2013 6:27:00 AM
 Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference 
 time enlightenment
 
 On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote:
  The following patch allows to activate a partition reference
  time enlightenment that is based on the host platform's support
  for an Invariant Time Stamp Counter (iTSC).
  
  v2 - v3
  Handle TSC sequence, scale, and offest changing during migration.
  
  ---
   arch/x86/include/asm/kvm_host.h |  1 +
   arch/x86/kvm/x86.c  | 29 +++--
   2 files changed, 28 insertions(+), 2 deletions(-)
  
  diff --git a/arch/x86/include/asm/kvm_host.h 
  b/arch/x86/include/asm/kvm_host.h
  index 2fd0753..81fdff0 100644
  --- a/arch/x86/include/asm/kvm_host.h
  +++ b/arch/x86/include/asm/kvm_host.h
  @@ -607,6 +607,7 @@ struct kvm_arch {
  u64 hv_hypercall;
  u64 hv_ref_count;
  u64 hv_tsc_page;
  +   u64 hv_ref_time;
   
  #ifdef CONFIG_KVM_MMU_AUDIT
  int audit_point;
  diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  index 5e4e495a..cb6766a 100644
  --- a/arch/x86/kvm/x86.c
  +++ b/arch/x86/kvm/x86.c
  @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, 
  u32 msr, u64 data)
  break;
  }
  gfn = data  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
  -   addr = gfn_to_hva(kvm, data 
  -   HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
  +   addr = gfn_to_hva(kvm, gfn);
  if (kvm_is_error_hva(addr))
  return 1;
  +   tsc_ref.tsc_sequence =
  +   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
  +   tsc_ref.tsc_scale =
  +   ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
  +   tsc_ref.tsc_offset = 0;
  if (__copy_to_user((void __user *)addr, tsc_ref, 
  sizeof(tsc_ref)))
  return 1;
  mark_page_dirty(kvm, gfn);
  kvm-arch.hv_tsc_page = data;
  +   kvm-arch.hv_ref_count = 0;
  break;
  }
  default:
  @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
  local_irq_enable();
  kvm-arch.kvmclock_offset = delta;
  kvm_gen_update_masterclock(kvm);
  +
  +   if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
  +   HV_REFERENCE_TSC_PAGE* tsc_ref;
  +   u64 curr_time;
  +   tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
  +   kvm-arch.hv_tsc_page  
  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
  +   tsc_ref-tsc_sequence =
  +   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
  tsc_ref-tsc_sequence + 1 : 0;
  +   tsc_ref-tsc_scale = ((1LL  32) / 
  __get_cpu_var(cpu_tsc_khz))  32;
  +   curr_time = (((tsc_ref-tsc_scale  32) * 
  native_read_tsc())  32) + 
  +   tsc_ref-tsc_offset;
  +   tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
  +   }
  break;
  }
  case KVM_GET_CLOCK: {
  @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
  if (copy_to_user(argp, user_ns, sizeof(user_ns)))
  goto out;
  r = 0;
  +   if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
  +   HV_REFERENCE_TSC_PAGE* tsc_ref;
  +   tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
  +   kvm-arch.hv_tsc_page  
  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 
 kvm_read_guest_cached.
 
  +   kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale  32) * 
  +   native_read_tsc())  32) + tsc_ref-tsc_offset;
 
 Why native_read_tsc and not -read_l1_tsc?
 
 [VR]
 Is it possible to get pointer to the vcpu instance at this point?

See the suggestion to move this code to kvm_guest_time_update.


 Thanks,
 Vadim. 
 
 It is easier to trust on the host to check reliability of the TSC: if
 it uses TSC clocksource, then the TSCs are stable. So could condition
 exposing the TSC ref page when ka-use_master_clock=1, see 
 kvm_guest_time_update.
 And hook into pvclock_gtod_notify.
 
 So in addition to X86_FEATURE_CONSTANT_TSC, check
 ka-use_master_clock=1
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2014-01-13 Thread Vadim Rozenfeld


- Original Message -
From: Marcelo Tosatti mtosa...@redhat.com
To: Vadim Rozenfeld vroze...@redhat.com
Cc: kvm@vger.kernel.org, p...@dlhnet.de, pbonz...@redhat.com
Sent: Thursday, December 12, 2013 6:27:00 AM
Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference 
time enlightenment

On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote:
 The following patch allows to activate a partition reference
 time enlightenment that is based on the host platform's support
 for an Invariant Time Stamp Counter (iTSC).
 
 v2 - v3
 Handle TSC sequence, scale, and offest changing during migration.
 
 ---
  arch/x86/include/asm/kvm_host.h |  1 +
  arch/x86/kvm/x86.c  | 29 +++--
  2 files changed, 28 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 2fd0753..81fdff0 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -607,6 +607,7 @@ struct kvm_arch {
   u64 hv_hypercall;
   u64 hv_ref_count;
   u64 hv_tsc_page;
 + u64 hv_ref_time;
  
   #ifdef CONFIG_KVM_MMU_AUDIT
   int audit_point;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 5e4e495a..cb6766a 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, 
 u32 msr, u64 data)
   break;
   }
   gfn = data  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
 - addr = gfn_to_hva(kvm, data 
 - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 + addr = gfn_to_hva(kvm, gfn);
   if (kvm_is_error_hva(addr))
   return 1;
 + tsc_ref.tsc_sequence =
 + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
 + tsc_ref.tsc_scale =
 + ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
 + tsc_ref.tsc_offset = 0;
   if (__copy_to_user((void __user *)addr, tsc_ref, 
 sizeof(tsc_ref)))
   return 1;
   mark_page_dirty(kvm, gfn);
   kvm-arch.hv_tsc_page = data;
 + kvm-arch.hv_ref_count = 0;
   break;
   }
   default:
 @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
   local_irq_enable();
   kvm-arch.kvmclock_offset = delta;
   kvm_gen_update_masterclock(kvm);
 +
 + if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
 + HV_REFERENCE_TSC_PAGE* tsc_ref;
 + u64 curr_time;
 + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
 + kvm-arch.hv_tsc_page  
 HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 + tsc_ref-tsc_sequence =
 + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
 tsc_ref-tsc_sequence + 1 : 0;
 + tsc_ref-tsc_scale = ((1LL  32) / 
 __get_cpu_var(cpu_tsc_khz))  32;
 + curr_time = (((tsc_ref-tsc_scale  32) * 
 native_read_tsc())  32) + 
 + tsc_ref-tsc_offset;
 + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
 + }
   break;
   }
   case KVM_GET_CLOCK: {
 @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
   if (copy_to_user(argp, user_ns, sizeof(user_ns)))
   goto out;
   r = 0;
 + if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
 + HV_REFERENCE_TSC_PAGE* tsc_ref;
 + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
 + kvm-arch.hv_tsc_page  
 HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);

kvm_read_guest_cached.

 + kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale  32) * 
 + native_read_tsc())  32) + tsc_ref-tsc_offset;

Why native_read_tsc and not -read_l1_tsc?

[VR]
Is it possible to get pointer to the vcpu instance at this point?
Thanks,
Vadim. 

It is easier to trust on the host to check reliability of the TSC: if
it uses TSC clocksource, then the TSCs are stable. So could condition
exposing the TSC ref page when ka-use_master_clock=1, see 
kvm_guest_time_update.
And hook into pvclock_gtod_notify.

So in addition to X86_FEATURE_CONSTANT_TSC, check
ka-use_master_clock=1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-12 Thread Paolo Bonzini
Il 11/12/2013 20:27, Marcelo Tosatti ha scritto:
  +  if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
  +  HV_REFERENCE_TSC_PAGE* tsc_ref;
  +  tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
  +  kvm-arch.hv_tsc_page  
  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 kvm_read_guest_cached.
 
  +  kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale  32) * 
  +  native_read_tsc())  32) + tsc_ref-tsc_offset;
 Why native_read_tsc and not -read_l1_tsc?
 
 It is easier to trust on the host to check reliability of the TSC: if
 it uses TSC clocksource, then the TSCs are stable. So could condition
 exposing the TSC ref page when ka-use_master_clock=1, see 
 kvm_guest_time_update.
 And hook into pvclock_gtod_notify.
 
 So in addition to X86_FEATURE_CONSTANT_TSC, check
 ka-use_master_clock=1

FWIW, I agree with all these comments from Marcelo.

Paolo

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-11 Thread Vadim Rozenfeld
On Tue, 2013-12-10 at 17:52 +0100, Paolo Bonzini wrote:
 Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto:
+   if (kvm-arch.hv_tsc_page  
HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+   HV_REFERENCE_TSC_PAGE* tsc_ref;
+   u64 curr_time;
+   tsc_ref = 
(HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
+   kvm-arch.hv_tsc_page  
HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+   tsc_ref-tsc_sequence =
+   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) 
? tsc_ref-tsc_sequence + 1 : 0;
+   tsc_ref-tsc_scale = ((1LL  32) / 
__get_cpu_var(cpu_tsc_khz))  32;
   
   Why shouldn't this be vcpu-arch.virtual_tsc_khz?
  
  Yeah, I was thinking about that, but we need a vcpu instance for this.
 
 You can perhaps store the value from vcpu-arch.virtual_tsc_khz to 
 kvm-arch when the MSR is first written?
 
  Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
  partition creation time and KVM_SET_CLOCK which happens during resume 
  after partition pause? If so - there are several differences, where
  the offset calculation probably is the most important one.
 
 The offset and frequence are the only differences.
 
 + curr_time = (((tsc_ref-tsc_scale  32) * 
 native_read_tsc())  32) + 
 + tsc_ref-tsc_offset;
 + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
 
 Why do you need kvm-arch.hv_ref_time at all?  Can you just use
 get_kernel_ns() + kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count?
 Then the same code can set tsc_ref-tsc_offset in both cases.
 
 In fact, it's not clear to me what hv_ref_time is for, and how it
 is different from 

OK, let me explain how it works.
Hyper-V allows guest to use invariant TSC provided by host as a time
stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and
normalizing it to 10MHz frequency, it is why we need tsc_scale.
tsc_offset is needed for migration or pause/resume cycles.
When we pause a VM, we need to save the current vTSC value
(hv_ref_time), which is rdtsc * tsc_scale + tsc_offset.
Then, during resume, we need to recalculate the new tsc_scale
as well as the new tsc_offset value. 
tsc_offset = old(saved) vTSC - new vTSC

So maybe hv_ref_time is not a good name, but we use it 
for keeping the old vTSC value, saved before stopping VM.

Vadim.

 
 By the way, a small nit:
 
  
  +   tsc_ref.tsc_sequence =
  +   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
  +   tsc_ref.tsc_scale =
  +   ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
  +   tsc_ref.tsc_offset = 0;
  if (__copy_to_user((void __user *)addr, tsc_ref, 
  sizeof(tsc_ref)))
  return 1;
  mark_page_dirty(kvm, gfn);
  kvm-arch.hv_tsc_page = data;
  +   kvm-arch.hv_ref_count = 0;
  break;
 
 This setting of kvm-arch.hv_ref_count belongs in the previous patch.
 
 Paolo


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-11 Thread Paolo Bonzini
Il 11/12/2013 11:58, Vadim Rozenfeld ha scritto:
  +  curr_time = (((tsc_ref-tsc_scale  32) * 
  native_read_tsc())  32) + 
  +  tsc_ref-tsc_offset;
  +  tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
  
  Why do you need kvm-arch.hv_ref_time at all?  Can you just use
  get_kernel_ns() + kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count?
  Then the same code can set tsc_ref-tsc_offset in both cases.
  
  In fact, it's not clear to me what hv_ref_time is for, and how it
  is different from 
 OK, let me explain how it works.
 Hyper-V allows guest to use invariant TSC provided by host as a time
 stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and
 normalizing it to 10MHz frequency, it is why we need tsc_scale.
 tsc_offset is needed for migration or pause/resume cycles.
 When we pause a VM, we need to save the current vTSC value
 (hv_ref_time), which is rdtsc * tsc_scale + tsc_offset.
 Then, during resume, we need to recalculate the new tsc_scale
 as well as the new tsc_offset value. 
 tsc_offset = old(saved) vTSC - new vTSC

In practice save means KVM_GET_CLOCK, and restore means
KVM_SET_CLOCK, right?

 So maybe hv_ref_time is not a good name, but we use it 
 for keeping the old vTSC value, saved before stopping VM.

Ok, this was roughly my understanding as well.

My understanding is also that (((tsc_ref-tsc_scale  32) *
native_read_tsc())  32) + tsc_ref-tsc_offset returns exactly the same
value as HV_X64_MSR_TIME_REF_COUNT.  Thus we do not need
kvm-arch.hv_ref_time.  We can use the value of
HV_X64_MSR_TIME_REF_COUNT, which is (get_kernel_ns() +
kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count) / 100, to compute
tsc_offset, like this:

  curr_time = (((tsc_ref-tsc_scale  32) * native_read_tsc())  32);
  tsc_ref-tsc_offset = get_hv_x64_msr_time_ref_count() - curr_time;

This code can be applied always: when the TSC page is initialized and
when KVM_SET_CLOCK is called.  You do not need to do anything for
KVM_GET_CLOCK.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-11 Thread Marcelo Tosatti
On Tue, Dec 10, 2013 at 10:23:17PM +1100, Vadim Rozenfeld wrote:
 On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote:
  Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
   + tsc_ref.tsc_sequence =
   + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
   + tsc_ref.tsc_scale =
   + ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
   + tsc_ref.tsc_offset = 0;
 if (__copy_to_user((void __user *)addr, tsc_ref, 
   sizeof(tsc_ref)))
 return 1;
 mark_page_dirty(kvm, gfn);
 kvm-arch.hv_tsc_page = data;
   + kvm-arch.hv_ref_count = 0;
 break;
 }
 default:
   @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
 local_irq_enable();
 kvm-arch.kvmclock_offset = delta;
 kvm_gen_update_masterclock(kvm);
   +
   + if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
   + HV_REFERENCE_TSC_PAGE* tsc_ref;
   + u64 curr_time;
   + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
   + kvm-arch.hv_tsc_page  
   HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
   + tsc_ref-tsc_sequence =
   + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
   tsc_ref-tsc_sequence + 1 : 0;
   + tsc_ref-tsc_scale = ((1LL  32) / 
   __get_cpu_var(cpu_tsc_khz))  32;
  
  Why shouldn't this be vcpu-arch.virtual_tsc_khz?
 Yeah, I was thinking about that, but we need a vcpu instance for this.

Move it to kvm_guest_time_update time (which is necessary anyway for the
pvclock gtod notifier changes etc).

   + curr_time = (((tsc_ref-tsc_scale  32) * 
   native_read_tsc())  32) + 
   + tsc_ref-tsc_offset;
   + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
   + }
  
  The difference in setting tsc_ref-tsc_scale is the only important
  change between the two occurrences.  If you can avoid that difference
  and you move this to a separate function, you can reuse that new
  function in set_msr_hyperv_pw as well.
 
 Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
 partition creation time and KVM_SET_CLOCK which happens during resume 
 after partition pause? If so - there are several differences, where
 the offset calculation probably is the most important one.
 
 Vadim.
 
  
  Also, kvm_set_tsc_khz should recompute the reference page's values as
  well, so you'd have three uses.
  
  Paolo
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-11 Thread Marcelo Tosatti
On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote:
 The following patch allows to activate a partition reference
 time enlightenment that is based on the host platform's support
 for an Invariant Time Stamp Counter (iTSC).
 
 v2 - v3
 Handle TSC sequence, scale, and offest changing during migration.
 
 ---
  arch/x86/include/asm/kvm_host.h |  1 +
  arch/x86/kvm/x86.c  | 29 +++--
  2 files changed, 28 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 2fd0753..81fdff0 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -607,6 +607,7 @@ struct kvm_arch {
   u64 hv_hypercall;
   u64 hv_ref_count;
   u64 hv_tsc_page;
 + u64 hv_ref_time;
  
   #ifdef CONFIG_KVM_MMU_AUDIT
   int audit_point;
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 5e4e495a..cb6766a 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, 
 u32 msr, u64 data)
   break;
   }
   gfn = data  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
 - addr = gfn_to_hva(kvm, data 
 - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 + addr = gfn_to_hva(kvm, gfn);
   if (kvm_is_error_hva(addr))
   return 1;
 + tsc_ref.tsc_sequence =
 + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
 + tsc_ref.tsc_scale =
 + ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
 + tsc_ref.tsc_offset = 0;
   if (__copy_to_user((void __user *)addr, tsc_ref, 
 sizeof(tsc_ref)))
   return 1;
   mark_page_dirty(kvm, gfn);
   kvm-arch.hv_tsc_page = data;
 + kvm-arch.hv_ref_count = 0;
   break;
   }
   default:
 @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
   local_irq_enable();
   kvm-arch.kvmclock_offset = delta;
   kvm_gen_update_masterclock(kvm);
 +
 + if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
 + HV_REFERENCE_TSC_PAGE* tsc_ref;
 + u64 curr_time;
 + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
 + kvm-arch.hv_tsc_page  
 HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 + tsc_ref-tsc_sequence =
 + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
 tsc_ref-tsc_sequence + 1 : 0;
 + tsc_ref-tsc_scale = ((1LL  32) / 
 __get_cpu_var(cpu_tsc_khz))  32;
 + curr_time = (((tsc_ref-tsc_scale  32) * 
 native_read_tsc())  32) + 
 + tsc_ref-tsc_offset;
 + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
 + }
   break;
   }
   case KVM_GET_CLOCK: {
 @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
   if (copy_to_user(argp, user_ns, sizeof(user_ns)))
   goto out;
   r = 0;
 + if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
 + HV_REFERENCE_TSC_PAGE* tsc_ref;
 + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
 + kvm-arch.hv_tsc_page  
 HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);

kvm_read_guest_cached.

 + kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale  32) * 
 + native_read_tsc())  32) + tsc_ref-tsc_offset;

Why native_read_tsc and not -read_l1_tsc?

It is easier to trust on the host to check reliability of the TSC: if
it uses TSC clocksource, then the TSCs are stable. So could condition
exposing the TSC ref page when ka-use_master_clock=1, see 
kvm_guest_time_update.
And hook into pvclock_gtod_notify.

So in addition to X86_FEATURE_CONSTANT_TSC, check
ka-use_master_clock=1


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-10 Thread Vadim Rozenfeld
On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote:
 Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
  +   tsc_ref.tsc_sequence =
  +   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
  +   tsc_ref.tsc_scale =
  +   ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
  +   tsc_ref.tsc_offset = 0;
  if (__copy_to_user((void __user *)addr, tsc_ref, 
  sizeof(tsc_ref)))
  return 1;
  mark_page_dirty(kvm, gfn);
  kvm-arch.hv_tsc_page = data;
  +   kvm-arch.hv_ref_count = 0;
  break;
  }
  default:
  @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
  local_irq_enable();
  kvm-arch.kvmclock_offset = delta;
  kvm_gen_update_masterclock(kvm);
  +
  +   if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
  +   HV_REFERENCE_TSC_PAGE* tsc_ref;
  +   u64 curr_time;
  +   tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
  +   kvm-arch.hv_tsc_page  
  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
  +   tsc_ref-tsc_sequence =
  +   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
  tsc_ref-tsc_sequence + 1 : 0;
  +   tsc_ref-tsc_scale = ((1LL  32) / 
  __get_cpu_var(cpu_tsc_khz))  32;
 
 Why shouldn't this be vcpu-arch.virtual_tsc_khz?
Yeah, I was thinking about that, but we need a vcpu instance for this.

 
  +   curr_time = (((tsc_ref-tsc_scale  32) * 
  native_read_tsc())  32) + 
  +   tsc_ref-tsc_offset;
  +   tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
  +   }
 
 The difference in setting tsc_ref-tsc_scale is the only important
 change between the two occurrences.  If you can avoid that difference
 and you move this to a separate function, you can reuse that new
 function in set_msr_hyperv_pw as well.

Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
partition creation time and KVM_SET_CLOCK which happens during resume 
after partition pause? If so - there are several differences, where
the offset calculation probably is the most important one.

Vadim.

 
 Also, kvm_set_tsc_khz should recompute the reference page's values as
 well, so you'd have three uses.
 
 Paolo


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-10 Thread Paolo Bonzini
Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto:
   + if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
   + HV_REFERENCE_TSC_PAGE* tsc_ref;
   + u64 curr_time;
   + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
   + kvm-arch.hv_tsc_page  
   HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
   + tsc_ref-tsc_sequence =
   + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
   tsc_ref-tsc_sequence + 1 : 0;
   + tsc_ref-tsc_scale = ((1LL  32) / 
   __get_cpu_var(cpu_tsc_khz))  32;
  
  Why shouldn't this be vcpu-arch.virtual_tsc_khz?
 
 Yeah, I was thinking about that, but we need a vcpu instance for this.

You can perhaps store the value from vcpu-arch.virtual_tsc_khz to 
kvm-arch when the MSR is first written?

 Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during
 partition creation time and KVM_SET_CLOCK which happens during resume 
 after partition pause? If so - there are several differences, where
 the offset calculation probably is the most important one.

The offset and frequence are the only differences.

+   curr_time = (((tsc_ref-tsc_scale  32) * 
native_read_tsc())  32) + 
+   tsc_ref-tsc_offset;
+   tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;

Why do you need kvm-arch.hv_ref_time at all?  Can you just use
get_kernel_ns() + kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count?
Then the same code can set tsc_ref-tsc_offset in both cases.

In fact, it's not clear to me what hv_ref_time is for, and how it
is different from 

By the way, a small nit:

 
 + tsc_ref.tsc_sequence =
 + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
 + tsc_ref.tsc_scale =
 + ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
 + tsc_ref.tsc_offset = 0;
   if (__copy_to_user((void __user *)addr, tsc_ref, 
 sizeof(tsc_ref)))
   return 1;
   mark_page_dirty(kvm, gfn);
   kvm-arch.hv_tsc_page = data;
 + kvm-arch.hv_ref_count = 0;
   break;

This setting of kvm-arch.hv_ref_count belongs in the previous patch.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-09 Thread Paolo Bonzini
Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto:
 + tsc_ref.tsc_sequence =
 + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
 + tsc_ref.tsc_scale =
 + ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
 + tsc_ref.tsc_offset = 0;
   if (__copy_to_user((void __user *)addr, tsc_ref, 
 sizeof(tsc_ref)))
   return 1;
   mark_page_dirty(kvm, gfn);
   kvm-arch.hv_tsc_page = data;
 + kvm-arch.hv_ref_count = 0;
   break;
   }
   default:
 @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
   local_irq_enable();
   kvm-arch.kvmclock_offset = delta;
   kvm_gen_update_masterclock(kvm);
 +
 + if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
 + HV_REFERENCE_TSC_PAGE* tsc_ref;
 + u64 curr_time;
 + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
 + kvm-arch.hv_tsc_page  
 HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
 + tsc_ref-tsc_sequence =
 + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
 tsc_ref-tsc_sequence + 1 : 0;
 + tsc_ref-tsc_scale = ((1LL  32) / 
 __get_cpu_var(cpu_tsc_khz))  32;

Why shouldn't this be vcpu-arch.virtual_tsc_khz?

 + curr_time = (((tsc_ref-tsc_scale  32) * 
 native_read_tsc())  32) + 
 + tsc_ref-tsc_offset;
 + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
 + }

The difference in setting tsc_ref-tsc_scale is the only important
change between the two occurrences.  If you can avoid that difference
and you move this to a separate function, you can reuse that new
function in set_msr_hyperv_pw as well.

Also, kvm_set_tsc_khz should recompute the reference page's values as
well, so you'd have three uses.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment

2013-12-08 Thread Vadim Rozenfeld
The following patch allows to activate a partition reference
time enlightenment that is based on the host platform's support
for an Invariant Time Stamp Counter (iTSC).

v2 - v3
Handle TSC sequence, scale, and offest changing during migration.

---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c  | 29 +++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2fd0753..81fdff0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -607,6 +607,7 @@ struct kvm_arch {
u64 hv_hypercall;
u64 hv_ref_count;
u64 hv_tsc_page;
+   u64 hv_ref_time;
 
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5e4e495a..cb6766a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 
msr, u64 data)
break;
}
gfn = data  HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
-   addr = gfn_to_hva(kvm, data 
-   HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+   addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return 1;
+   tsc_ref.tsc_sequence =
+   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0;
+   tsc_ref.tsc_scale =
+   ((1LL  32) / vcpu-arch.virtual_tsc_khz)  32;
+   tsc_ref.tsc_offset = 0;
if (__copy_to_user((void __user *)addr, tsc_ref, 
sizeof(tsc_ref)))
return 1;
mark_page_dirty(kvm, gfn);
kvm-arch.hv_tsc_page = data;
+   kvm-arch.hv_ref_count = 0;
break;
}
default:
@@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
local_irq_enable();
kvm-arch.kvmclock_offset = delta;
kvm_gen_update_masterclock(kvm);
+
+   if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+   HV_REFERENCE_TSC_PAGE* tsc_ref;
+   u64 curr_time;
+   tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, 
+   kvm-arch.hv_tsc_page  
HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+   tsc_ref-tsc_sequence =
+   boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 
tsc_ref-tsc_sequence + 1 : 0;
+   tsc_ref-tsc_scale = ((1LL  32) / 
__get_cpu_var(cpu_tsc_khz))  32;
+   curr_time = (((tsc_ref-tsc_scale  32) * 
native_read_tsc())  32) + 
+   tsc_ref-tsc_offset;
+   tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time;
+   }
break;
}
case KVM_GET_CLOCK: {
@@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (copy_to_user(argp, user_ns, sizeof(user_ns)))
goto out;
r = 0;
+   if (kvm-arch.hv_tsc_page  HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+   HV_REFERENCE_TSC_PAGE* tsc_ref;
+   tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm,
+   kvm-arch.hv_tsc_page  
HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT);
+   kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale  32) * 
+   native_read_tsc())  32) + tsc_ref-tsc_offset;
+   }
break;
}
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html