Gerd Hoffmann wrote:
> Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
>
Looks basically OK, but some comments.
> ---
> arch/x86/Kconfig | 4 +
> arch/x86/kernel/Makefile | 1 +
> arch/x86/kernel/pvclock.c | 148
> +++++++++++++++++++++++++++++++++++++++++++++
> include/asm-x86/pvclock.h | 6 ++
> 4 files changed, 159 insertions(+), 0 deletions(-)
> create mode 100644 arch/x86/kernel/pvclock.c
> create mode 100644 include/asm-x86/pvclock.h
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index fe361ae..deb3049 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -417,6 +417,10 @@ config PARAVIRT
> over full virtualization. However, when run without a hypervisor
> the kernel is theoretically slower and slightly larger.
>
> +config PARAVIRT_CLOCK
> + bool
> + default n
> +
> endif
>
> config MEMTEST_BOOTPARAM
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 5e618c3..77807d4 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
> obj-$(CONFIG_KVM_GUEST) += kvm.o
> obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
> obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
> +obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
>
> obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
>
> diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
> new file mode 100644
> index 0000000..33e526f
> --- /dev/null
> +++ b/arch/x86/kernel/pvclock.c
> @@ -0,0 +1,148 @@
> +/* paravirtual clock -- common code used by kvm/xen
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 2 of the License, or
> + (at your option) any later version.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program; if not, write to the Free Software
> + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
> USA
> +*/
> +
> +#include <linux/kernel.h>
> +#include <linux/percpu.h>
> +#include <asm/pvclock.h>
> +
> +/*
> + * These are perodically updated
> + * xen: magic shared_info page
> + * kvm: gpa registered via msr
> + * and then copied here.
> + */
> +struct pvclock_shadow_time {
> + u64 tsc_timestamp; /* TSC at last update of time vals. */
> + u64 system_timestamp; /* Time, in nanosecs, since boot. */
> + u32 tsc_to_nsec_mul;
> + int tsc_shift;
> + u32 version;
> +};
> +
> +/*
> + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
> + * yielding a 64-bit result.
> + */
> +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
> +{
> + u64 product;
> +#ifdef __i386__
> + u32 tmp1, tmp2;
> +#endif
> +
> + if (shift < 0)
> + delta >>= -shift;
> + else
> + delta <<= shift;
> +
> +#ifdef __i386__
> + __asm__ (
> + "mul %5 ; "
> + "mov %4,%%eax ; "
> + "mov %%edx,%4 ; "
> + "mul %5 ; "
> + "xor %5,%5 ; "
> + "add %4,%%eax ; "
> + "adc %5,%%edx ; "
> + : "=A" (product), "=r" (tmp1), "=r" (tmp2)
> + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
> +#elif __x86_64__
> + __asm__ (
> + "mul %%rdx ; shrd $32,%%rdx,%%rax"
> + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
> +#else
> +#error implement me!
> +#endif
> +
> + return product;
> +}
> +
> +static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
> +{
> + u64 delta = native_read_tsc() - shadow->tsc_timestamp;
> + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
> +}
> +
> +/*
> + * Reads a consistent set of time-base values from hypervisor,
> + * into a shadow data area.
> + */
> +static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
> + struct kvm_vcpu_time_info *src)
>
I think the kvm_* types should be renamed. xen_* would make some sense
since the ABI originated with Xen, but something generic would be OK
too. The important thing to get across is that the structure defines a
guest<->host ABI which happens to be shared by Xen and KVM, and it isn't
an in-kernel API like the rest of paravirt_ops.
And having defined a common structure, we may as well use it in the
hypervisor-specific code/headers so there's no strange casting going on.
> +{
> + do {
> + dst->version = src->version;
> + rmb(); /* fetch version before data */
> + dst->tsc_timestamp = src->tsc_timestamp;
> + dst->system_timestamp = src->system_time;
> + dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
> + dst->tsc_shift = src->tsc_shift;
> + rmb(); /* test version after fetching data */
> + } while ((src->version & 1) || (dst->version != src->version));
> +
> + return dst->version;
> +}
> +
> +/*
> + * This is our read_clock function. The host puts an tsc timestamp each time
> + * it updates a new time. Without the tsc adjustment, we can have a situation
> + * in which a vcpu starts to run earlier (smaller system_time), but probes
> + * time later (compared to another vcpu), leading to backwards time
> + */
>
This comment is confusing. Are you explaining why the tsc is read
within the loop? I think it can be clarified.
> +
> +cycle_t pvclock_clocksource_read(struct kvm_vcpu_time_info *src)
> +{
> + struct pvclock_shadow_time shadow;
> + unsigned version;
> + cycle_t ret, offset;
> +
> + do {
> + version = pvclock_get_time_values(&shadow, src);
> + barrier();
> + offset = pvclock_get_nsec_offset(&shadow);
> + ret = shadow.system_timestamp + offset;
> + barrier();
> + } while (version != src->version);
> +
> + return ret;
> +}
> +
> +void pvclock_read_wallclock(struct kvm_wall_clock *wall_clock,
> + struct kvm_vcpu_time_info *vcpu_time,
>
Ditto type names.
> + struct timespec *ts)
> +{
> + u32 version;
> + u64 delta;
> + struct timespec now;
> +
> + /* get wallclock at system boot */
> + do {
> + version = wall_clock->wc_version;
> + rmb(); /* fetch version before time */
> + now.tv_sec = wall_clock->wc_sec;
> + now.tv_nsec = wall_clock->wc_nsec;
> + rmb(); /* fetch time before checking version */
> + } while ((wall_clock->wc_version & 1) || (version !=
> wall_clock->wc_version));
> +
> + delta = pvclock_clocksource_read(vcpu_time); /* time since system
> boot */
> + delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
> +
> + now.tv_nsec = do_div(delta, NSEC_PER_SEC);
> + now.tv_sec = delta;
> +
> + set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
> +}
> diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
> new file mode 100644
> index 0000000..2b9812f
> --- /dev/null
> +++ b/include/asm-x86/pvclock.h
> @@ -0,0 +1,6 @@
> +#include <linux/clocksource.h>
> +#include <asm/kvm_para.h>
> +cycle_t pvclock_clocksource_read(struct kvm_vcpu_time_info *src);
> +void pvclock_read_wallclock(struct kvm_wall_clock *wall,
> + struct kvm_vcpu_time_info *vcpu,
> + struct timespec *ts);
>
No #ifdef guards?
J
_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/virtualization