Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
On Mon, Jan 13, 2014 at 11:11:40PM -0500, Vadim Rozenfeld wrote: - Original Message - From: Marcelo Tosatti mtosa...@redhat.com To: Vadim Rozenfeld vroze...@redhat.com Cc: kvm@vger.kernel.org, p...@dlhnet.de, pbonz...@redhat.com Sent: Thursday, December 12, 2013 6:27:00 AM Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote: The following patch allows to activate a partition reference time enlightenment that is based on the host platform's support for an Invariant Time Stamp Counter (iTSC). v2 - v3 Handle TSC sequence, scale, and offest changing during migration. --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 29 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2fd0753..81fdff0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,6 +607,7 @@ struct kvm_arch { u64 hv_hypercall; u64 hv_ref_count; u64 hv_tsc_page; + u64 hv_ref_time; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e4e495a..cb6766a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; } gfn = data HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - addr = gfn_to_hva(kvm, data - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return 1; + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm-arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; + } break; } case KVM_GET_CLOCK: { @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_to_user(argp, user_ns, sizeof(user_ns))) goto out; r = 0; + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); kvm_read_guest_cached. + kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale 32) * + native_read_tsc()) 32) + tsc_ref-tsc_offset; Why native_read_tsc and not -read_l1_tsc? [VR] Is it possible to get pointer to the vcpu instance at this point? See the suggestion to move this code to kvm_guest_time_update. Thanks, Vadim. It is easier to trust on the host to check reliability of the TSC: if it uses TSC clocksource, then the TSCs are stable. So could condition exposing the TSC ref page when ka-use_master_clock=1, see kvm_guest_time_update. And hook into pvclock_gtod_notify. So in addition to X86_FEATURE_CONSTANT_TSC, check ka-use_master_clock=1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
- Original Message - From: Marcelo Tosatti mtosa...@redhat.com To: Vadim Rozenfeld vroze...@redhat.com Cc: kvm@vger.kernel.org, p...@dlhnet.de, pbonz...@redhat.com Sent: Thursday, December 12, 2013 6:27:00 AM Subject: Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote: The following patch allows to activate a partition reference time enlightenment that is based on the host platform's support for an Invariant Time Stamp Counter (iTSC). v2 - v3 Handle TSC sequence, scale, and offest changing during migration. --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 29 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2fd0753..81fdff0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,6 +607,7 @@ struct kvm_arch { u64 hv_hypercall; u64 hv_ref_count; u64 hv_tsc_page; + u64 hv_ref_time; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e4e495a..cb6766a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; } gfn = data HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - addr = gfn_to_hva(kvm, data - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return 1; + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm-arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; + } break; } case KVM_GET_CLOCK: { @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_to_user(argp, user_ns, sizeof(user_ns))) goto out; r = 0; + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); kvm_read_guest_cached. + kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale 32) * + native_read_tsc()) 32) + tsc_ref-tsc_offset; Why native_read_tsc and not -read_l1_tsc? [VR] Is it possible to get pointer to the vcpu instance at this point? Thanks, Vadim. It is easier to trust on the host to check reliability of the TSC: if it uses TSC clocksource, then the TSCs are stable. So could condition exposing the TSC ref page when ka-use_master_clock=1, see kvm_guest_time_update. And hook into pvclock_gtod_notify. So in addition to X86_FEATURE_CONSTANT_TSC, check ka-use_master_clock=1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
Il 11/12/2013 20:27, Marcelo Tosatti ha scritto: + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); kvm_read_guest_cached. + kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale 32) * + native_read_tsc()) 32) + tsc_ref-tsc_offset; Why native_read_tsc and not -read_l1_tsc? It is easier to trust on the host to check reliability of the TSC: if it uses TSC clocksource, then the TSCs are stable. So could condition exposing the TSC ref page when ka-use_master_clock=1, see kvm_guest_time_update. And hook into pvclock_gtod_notify. So in addition to X86_FEATURE_CONSTANT_TSC, check ka-use_master_clock=1 FWIW, I agree with all these comments from Marcelo. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
On Tue, 2013-12-10 at 17:52 +0100, Paolo Bonzini wrote: Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto: + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; Why shouldn't this be vcpu-arch.virtual_tsc_khz? Yeah, I was thinking about that, but we need a vcpu instance for this. You can perhaps store the value from vcpu-arch.virtual_tsc_khz to kvm-arch when the MSR is first written? Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during partition creation time and KVM_SET_CLOCK which happens during resume after partition pause? If so - there are several differences, where the offset calculation probably is the most important one. The offset and frequence are the only differences. + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; Why do you need kvm-arch.hv_ref_time at all? Can you just use get_kernel_ns() + kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count? Then the same code can set tsc_ref-tsc_offset in both cases. In fact, it's not clear to me what hv_ref_time is for, and how it is different from OK, let me explain how it works. Hyper-V allows guest to use invariant TSC provided by host as a time stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and normalizing it to 10MHz frequency, it is why we need tsc_scale. tsc_offset is needed for migration or pause/resume cycles. When we pause a VM, we need to save the current vTSC value (hv_ref_time), which is rdtsc * tsc_scale + tsc_offset. Then, during resume, we need to recalculate the new tsc_scale as well as the new tsc_offset value. tsc_offset = old(saved) vTSC - new vTSC So maybe hv_ref_time is not a good name, but we use it for keeping the old vTSC value, saved before stopping VM. Vadim. By the way, a small nit: + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; This setting of kvm-arch.hv_ref_count belongs in the previous patch. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
Il 11/12/2013 11:58, Vadim Rozenfeld ha scritto: + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; Why do you need kvm-arch.hv_ref_time at all? Can you just use get_kernel_ns() + kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count? Then the same code can set tsc_ref-tsc_offset in both cases. In fact, it's not clear to me what hv_ref_time is for, and how it is different from OK, let me explain how it works. Hyper-V allows guest to use invariant TSC provided by host as a time stamp source (KeQueryPerformanceCounter). Guest is calling rdtsc and normalizing it to 10MHz frequency, it is why we need tsc_scale. tsc_offset is needed for migration or pause/resume cycles. When we pause a VM, we need to save the current vTSC value (hv_ref_time), which is rdtsc * tsc_scale + tsc_offset. Then, during resume, we need to recalculate the new tsc_scale as well as the new tsc_offset value. tsc_offset = old(saved) vTSC - new vTSC In practice save means KVM_GET_CLOCK, and restore means KVM_SET_CLOCK, right? So maybe hv_ref_time is not a good name, but we use it for keeping the old vTSC value, saved before stopping VM. Ok, this was roughly my understanding as well. My understanding is also that (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + tsc_ref-tsc_offset returns exactly the same value as HV_X64_MSR_TIME_REF_COUNT. Thus we do not need kvm-arch.hv_ref_time. We can use the value of HV_X64_MSR_TIME_REF_COUNT, which is (get_kernel_ns() + kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count) / 100, to compute tsc_offset, like this: curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32); tsc_ref-tsc_offset = get_hv_x64_msr_time_ref_count() - curr_time; This code can be applied always: when the TSC page is initialized and when KVM_SET_CLOCK is called. You do not need to do anything for KVM_GET_CLOCK. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
On Tue, Dec 10, 2013 at 10:23:17PM +1100, Vadim Rozenfeld wrote: On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote: Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm-arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; Why shouldn't this be vcpu-arch.virtual_tsc_khz? Yeah, I was thinking about that, but we need a vcpu instance for this. Move it to kvm_guest_time_update time (which is necessary anyway for the pvclock gtod notifier changes etc). + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; + } The difference in setting tsc_ref-tsc_scale is the only important change between the two occurrences. If you can avoid that difference and you move this to a separate function, you can reuse that new function in set_msr_hyperv_pw as well. Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during partition creation time and KVM_SET_CLOCK which happens during resume after partition pause? If so - there are several differences, where the offset calculation probably is the most important one. Vadim. Also, kvm_set_tsc_khz should recompute the reference page's values as well, so you'd have three uses. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
On Sun, Dec 08, 2013 at 10:33:39PM +1100, Vadim Rozenfeld wrote: The following patch allows to activate a partition reference time enlightenment that is based on the host platform's support for an Invariant Time Stamp Counter (iTSC). v2 - v3 Handle TSC sequence, scale, and offest changing during migration. --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 29 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2fd0753..81fdff0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,6 +607,7 @@ struct kvm_arch { u64 hv_hypercall; u64 hv_ref_count; u64 hv_tsc_page; + u64 hv_ref_time; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e4e495a..cb6766a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; } gfn = data HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - addr = gfn_to_hva(kvm, data - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return 1; + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm-arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; + } break; } case KVM_GET_CLOCK: { @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_to_user(argp, user_ns, sizeof(user_ns))) goto out; r = 0; + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); kvm_read_guest_cached. + kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale 32) * + native_read_tsc()) 32) + tsc_ref-tsc_offset; Why native_read_tsc and not -read_l1_tsc? It is easier to trust on the host to check reliability of the TSC: if it uses TSC clocksource, then the TSCs are stable. So could condition exposing the TSC ref page when ka-use_master_clock=1, see kvm_guest_time_update. And hook into pvclock_gtod_notify. So in addition to X86_FEATURE_CONSTANT_TSC, check ka-use_master_clock=1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
On Mon, 2013-12-09 at 15:32 +0100, Paolo Bonzini wrote: Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm-arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; Why shouldn't this be vcpu-arch.virtual_tsc_khz? Yeah, I was thinking about that, but we need a vcpu instance for this. + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; + } The difference in setting tsc_ref-tsc_scale is the only important change between the two occurrences. If you can avoid that difference and you move this to a separate function, you can reuse that new function in set_msr_hyperv_pw as well. Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during partition creation time and KVM_SET_CLOCK which happens during resume after partition pause? If so - there are several differences, where the offset calculation probably is the most important one. Vadim. Also, kvm_set_tsc_khz should recompute the reference page's values as well, so you'd have three uses. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
Il 10/12/2013 12:23, Vadim Rozenfeld ha scritto: + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; Why shouldn't this be vcpu-arch.virtual_tsc_khz? Yeah, I was thinking about that, but we need a vcpu instance for this. You can perhaps store the value from vcpu-arch.virtual_tsc_khz to kvm-arch when the MSR is first written? Do you mean between HV_X64_MSR_REFERENCE_TSC which happens during partition creation time and KVM_SET_CLOCK which happens during resume after partition pause? If so - there are several differences, where the offset calculation probably is the most important one. The offset and frequence are the only differences. + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; Why do you need kvm-arch.hv_ref_time at all? Can you just use get_kernel_ns() + kvm-arch.kvmclock_offset - kvm-arch.hv_ref_count? Then the same code can set tsc_ref-tsc_offset in both cases. In fact, it's not clear to me what hv_ref_time is for, and how it is different from By the way, a small nit: + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; This setting of kvm-arch.hv_ref_count belongs in the previous patch. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
Il 08/12/2013 12:33, Vadim Rozenfeld ha scritto: + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm-arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; Why shouldn't this be vcpu-arch.virtual_tsc_khz? + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; + } The difference in setting tsc_ref-tsc_scale is the only important change between the two occurrences. If you can avoid that difference and you move this to a separate function, you can reuse that new function in set_msr_hyperv_pw as well. Also, kvm_set_tsc_khz should recompute the reference page's values as well, so you'd have three uses. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH v3 2/2] add support for Hyper-V partition reference time enlightenment
The following patch allows to activate a partition reference time enlightenment that is based on the host platform's support for an Invariant Time Stamp Counter (iTSC). v2 - v3 Handle TSC sequence, scale, and offest changing during migration. --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 29 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2fd0753..81fdff0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,6 +607,7 @@ struct kvm_arch { u64 hv_hypercall; u64 hv_ref_count; u64 hv_tsc_page; + u64 hv_ref_time; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e4e495a..cb6766a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1882,14 +1882,19 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; } gfn = data HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - addr = gfn_to_hva(kvm, data - HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return 1; + tsc_ref.tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? 1 : 0; + tsc_ref.tsc_scale = + ((1LL 32) / vcpu-arch.virtual_tsc_khz) 32; + tsc_ref.tsc_offset = 0; if (__copy_to_user((void __user *)addr, tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); kvm-arch.hv_tsc_page = data; + kvm-arch.hv_ref_count = 0; break; } default: @@ -3879,6 +3884,19 @@ long kvm_arch_vm_ioctl(struct file *filp, local_irq_enable(); kvm-arch.kvmclock_offset = delta; kvm_gen_update_masterclock(kvm); + + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + u64 curr_time; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + tsc_ref-tsc_sequence = + boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ? tsc_ref-tsc_sequence + 1 : 0; + tsc_ref-tsc_scale = ((1LL 32) / __get_cpu_var(cpu_tsc_khz)) 32; + curr_time = (((tsc_ref-tsc_scale 32) * native_read_tsc()) 32) + + tsc_ref-tsc_offset; + tsc_ref-tsc_offset = kvm-arch.hv_ref_time - curr_time; + } break; } case KVM_GET_CLOCK: { @@ -3896,6 +3914,13 @@ long kvm_arch_vm_ioctl(struct file *filp, if (copy_to_user(argp, user_ns, sizeof(user_ns))) goto out; r = 0; + if (kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ENABLE) { + HV_REFERENCE_TSC_PAGE* tsc_ref; + tsc_ref = (HV_REFERENCE_TSC_PAGE*)gfn_to_hva(kvm, + kvm-arch.hv_tsc_page HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT); + kvm-arch.hv_ref_time = (((tsc_ref-tsc_scale 32) * + native_read_tsc()) 32) + tsc_ref-tsc_offset; + } break; } -- 1.8.1.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html