On Mon, Nov 19, 2018 at 01:12:46PM +0100, Reyk Floeter wrote:
> the attached diff is another attempt at implementing a pvclock(4)
> guest driver.  This improves the clock on KVM and replaces the need
> for using the VM-expensive acpihpet(4).
> 

So far I only got positive reports.  Where are the problems? ;)

Otherwise: OK?

Reyk

> Index: share/man/man4/pvclock.4
> ===================================================================
> RCS file: share/man/man4/pvclock.4
> diff -N share/man/man4/pvclock.4
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ share/man/man4/pvclock.4  19 Nov 2018 11:48:33 -0000
> @@ -0,0 +1,45 @@
> +.\"  $OpenBSD$
> +.\"
> +.\" Copyright (c) 2018 Reyk Floeter <r...@openbsd.org>
> +.\"
> +.\" Permission to use, copy, modify, and distribute this software for any
> +.\" purpose with or without fee is hereby granted, provided that the above
> +.\" copyright notice and this permission notice appear in all copies.
> +.\"
> +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +.\"
> +.Dd $Mdocdate$
> +.Dt PVCLOCK 4
> +.Os
> +.Sh NAME
> +.Nm pvclock
> +.Nd paravirtual clock driver
> +.Sh SYNOPSIS
> +.Cd "pvclock* at pvbus?
> +.Sh DESCRIPTION
> +The
> +.Nm
> +driver supports the paravirtual clock that is available in KVM and
> +other hypervisors.
> +.Nm
> +uses a shared page between the host and the hypervisor to synchronize
> +the TSC clock in an efficient way.
> +.Sh SEE ALSO
> +.Xr pvbus 4
> +.Sh HISTORY
> +The
> +.Nm
> +driver first appeared in
> +.Ox 6.5 .
> +.Sh AUTHORS
> +.An -nosplit
> +The
> +.Nm
> +driver was written by
> +.An Reyk Floeter Aq Mt r...@openbsd.org .
> Index: sys/arch/amd64/conf/GENERIC
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v
> retrieving revision 1.464
> diff -u -p -u -p -r1.464 GENERIC
> --- sys/arch/amd64/conf/GENERIC       26 Oct 2018 20:26:19 -0000      1.464
> +++ sys/arch/amd64/conf/GENERIC       19 Nov 2018 11:48:33 -0000
> @@ -79,6 +79,8 @@ ipmi0       at mainbus? disable     # IPMI
>  
>  vmt0 at pvbus?               # VMware Tools
>  
> +pvclock0 at pvbus?           # KVM pvclock
> +
>  xen0 at pvbus?               # Xen HVM domU
>  xnf* at xen?                 # Xen Netfront
>  xbf* at xen?                 # Xen Blkfront
> Index: sys/dev/pv/files.pv
> ===================================================================
> RCS file: /cvs/src/sys/dev/pv/files.pv,v
> retrieving revision 1.14
> diff -u -p -u -p -r1.14 files.pv
> --- sys/dev/pv/files.pv       24 Aug 2018 16:07:01 -0000      1.14
> +++ sys/dev/pv/files.pv       19 Nov 2018 11:48:33 -0000
> @@ -8,6 +8,11 @@ device       pvbus
>  attach       pvbus at mainbus
>  file dev/pv/pvbus.c                  pvbus   needs-flag
>  
> +# KVM clock
> +device       pvclock
> +attach       pvclock at pvbus
> +file dev/pv/pvclock.c                pvclock needs-flag
> +
>  # VMware Tools
>  device       vmt
>  attach       vmt at pvbus
> Index: sys/dev/pv/pvclock.c
> ===================================================================
> RCS file: sys/dev/pv/pvclock.c
> diff -N sys/dev/pv/pvclock.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/dev/pv/pvclock.c      19 Nov 2018 11:48:33 -0000
> @@ -0,0 +1,229 @@
> +/*   $OpenBSD$       */
> +
> +/*
> + * Copyright (c) 2018 Reyk Floeter <r...@openbsd.org>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#if !defined(__i386__) && !defined(__amd64__)
> +#error pvclock(4) is only supported on i386 and amd64
> +#endif
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +#include <sys/kernel.h>
> +#include <sys/timetc.h>
> +#include <sys/timeout.h>
> +#include <sys/malloc.h>
> +#include <sys/atomic.h>
> +
> +#include <machine/cpu.h>
> +#include <uvm/uvm_extern.h>
> +
> +#include <dev/pv/pvvar.h>
> +#include <dev/pv/pvreg.h>
> +
> +struct pvclock_softc {
> +     struct device            sc_dev;
> +     void                    *sc_time;
> +     paddr_t                  sc_paddr;
> +     struct timecounter      *sc_tc;
> +};
> +
> +struct pvclock_wall_clock {
> +     uint32_t                 wc_version;
> +     uint32_t                 wc_sec;
> +     uint32_t                 wc_nsec;
> +} __packed;
> +
> +struct pvclock_time_info {
> +     uint32_t                 ti_version;
> +     uint32_t                 ti_pad0;
> +     uint64_t                 ti_tsc_timestamp;
> +     uint64_t                 ti_system_time;
> +     uint32_t                 ti_tsc_to_system_mul;
> +     int8_t                   ti_tsc_shift;
> +     uint8_t                  ti_flags;
> +     uint8_t                  ti_pad[2];
> +} __packed;
> +
> +#define PVCLOCK_FLAG_TSC_STABLE              0x01
> +#define PVCLOCK_SYSTEM_TIME_ENABLE   0x01
> +#define DEVNAME(_s)                  ((_s)->sc_dev.dv_xname)
> +
> +int   pvclock_match(struct device *, void *, void *);
> +void  pvclock_attach(struct device *, struct device *, void *);
> +int   pvclock_activate(struct device *, int);
> +
> +uint  pvclock_get_timecount(struct timecounter *);
> +void  pvclock_read_time_info(struct pvclock_softc *,
> +         struct pvclock_time_info *);
> +
> +struct cfattach pvclock_ca = {
> +     sizeof(struct pvclock_softc),
> +     pvclock_match,
> +     pvclock_attach,
> +     NULL,
> +     pvclock_activate
> +};
> +
> +struct cfdriver pvclock_cd = {
> +     NULL,
> +     "pvclock",
> +     DV_DULL
> +};
> +
> +struct timecounter pvclock_timecounter = {
> +     pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
> +};
> +
> +int
> +pvclock_match(struct device *parent, void *match, void *aux)
> +{
> +     struct pv_attach_args   *pva = aux;
> +     struct pvbus_hv         *hv;
> +
> +     /*
> +      * pvclock is provided by different hypervisors, we currently
> +      * only support the "kvmclock".
> +      */
> +     hv = &pva->pva_hv[PVBUS_KVM];
> +     if (hv->hv_base != 0) {
> +             /*
> +              * We only implement support for the 2nd version of pvclock.
> +              * The first version is basically the same but with different
> +              * non-standard MSRs and it is deprecated.
> +              */
> +             if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
> +                     return (0);
> +
> +             /*
> +              * Only the "stable" clock with a sync'ed TSC is supported.
> +              * In this case the host guarantees that the TSC is constant
> +              * and invariant, either by the underlying TSC or by passing
> +              * on a synchronized value.
> +              */
> +             if ((hv->hv_features &
> +                 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
> +                     return (0);
> +     }
> +
> +     return (1);
> +}
> +
> +void
> +pvclock_attach(struct device *parent, struct device *self, void *aux)
> +{
> +     struct pvclock_softc    *sc = (struct pvclock_softc *)self;
> +     paddr_t                  pa;
> +
> +     if ((sc->sc_time = km_alloc(PAGE_SIZE,
> +         &kv_any, &kp_zero, &kd_nowait)) == NULL) {
> +             printf(": time page allocation failed\n");
> +             return;
> +     }
> +     if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
> +             printf(": time page PA extraction failed\n");
> +             km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
> +             return;
> +     }
> +
> +     wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
> +     sc->sc_paddr = pa;
> +
> +     sc->sc_tc = &pvclock_timecounter;
> +     sc->sc_tc->tc_name = DEVNAME(sc);
> +     sc->sc_tc->tc_frequency = 1000000000ULL;
> +     sc->sc_tc->tc_priv = sc;
> +
> +     /* Better than HPET but below TSC */
> +     sc->sc_tc->tc_quality = 1500;
> +
> +     tc_init(sc->sc_tc);
> +
> +     printf("\n");
> +}
> +
> +int
> +pvclock_activate(struct device *self, int act)
> +{
> +     struct pvclock_softc    *sc = (struct pvclock_softc *)self;
> +     int                      rv = 0;
> +     paddr_t                  pa = sc->sc_paddr;
> +
> +     switch (act) {
> +     case DVACT_POWERDOWN:
> +             wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
> +             break;
> +     case DVACT_RESUME:
> +             wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
> +             break;
> +     }
> +
> +     return (rv);
> +}
> +
> +static inline uint32_t
> +pvclock_read_begin(const struct pvclock_time_info *ti)
> +{
> +     uint32_t version = ti->ti_version & ~0x1;
> +     virtio_membar_sync();
> +     return (version);
> +}
> +
> +static inline int
> +pvclock_read_done(const struct pvclock_time_info *ti,
> +    uint32_t version)
> +{
> +     virtio_membar_sync();
> +     return (ti->ti_version == version);
> +}
> +
> +uint
> +pvclock_get_timecount(struct timecounter *tc)
> +{
> +     struct pvclock_softc            *sc = tc->tc_priv;
> +     struct pvclock_time_info        *ti;
> +     uint64_t                         tsc_timestamp, system_time, delta, ctr;
> +     uint32_t                         version, mul_frac;
> +     int8_t                           shift;
> +     uint8_t                          flags;
> +
> +     ti = sc->sc_time;
> +     do {
> +             version = pvclock_read_begin(ti);
> +             system_time = ti->ti_system_time;
> +             tsc_timestamp = ti->ti_tsc_timestamp;
> +             mul_frac = ti->ti_tsc_to_system_mul;
> +             shift = ti->ti_tsc_shift;
> +             flags = ti->ti_flags;
> +     } while (!pvclock_read_done(ti, version));
> +
> +     /* This bit must be set as we attached based on the stable flag */
> +     if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0)
> +             panic("%s: unstable result on stable clock", DEVNAME(sc));
> +
> +     /*
> +      * The algorithm is described in
> +      * linux/Documentation/virtual/kvm/msr.txt
> +      */
> +     delta = rdtsc() - tsc_timestamp;
> +     if (shift < 0)
> +             delta >>= -shift;
> +     else
> +             delta <<= shift;
> +     ctr = ((delta * mul_frac) >> 32) + system_time;
> +
> +     return (ctr);
> +}
> Index: sys/dev/pv/pvreg.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pv/pvreg.h,v
> retrieving revision 1.4
> diff -u -p -u -p -r1.4 pvreg.h
> --- sys/dev/pv/pvreg.h        12 Dec 2015 12:33:49 -0000      1.4
> +++ sys/dev/pv/pvreg.h        19 Nov 2018 11:48:33 -0000
> @@ -43,6 +43,9 @@
>  #define      KVM_MSR_EOI_EN                          0x4b564d04
>  #define KVM_PV_EOI_BIT                               0
>  
> +#define KVM_MSR_WALL_CLOCK                   0x4b564d00
> +#define KVM_MSR_SYSTEM_TIME                  0x4b564d01
> +
>  /*
>   * Hyper-V
>   */

Reply via email to