> Date: Thu, 28 May 2020 17:44:31 +0300
> From: Paul Irofti <p...@irofti.net>
> 
> Hi,
> 
> Here is a new iteration of the diff which includes support for MD high
> resolution clocks. Currently only implements TSC on amd64. If the
> MD function is not defined, it fallsback to the syscall.
> 
> There is the question of the skew fix, but that will be addressed in a
> separate kernel diff that will not affect the current diff at all.
> 
> I could not find a way to find on which processor the process is running
> on from userland without going through a syscall. If there is one please
> let me know. It would make things easier.
> 
> In the meantime I have also gotten positive feedback from various
> testers that run this on their main machine.
> 
> Anyway, I think we can decide on the struct name and the auxiliary
> vector ID and consider this done.
> 
> Thoughts?

This is getting us somewhere.

Still some issues though (besides the skew thing you already mention).

1. The synchronization mechanism is broken.  The seq member needs to
   be set to 0 while updating the struct and only set to the "next"
   value after completing the update of the full struct.  You need to
   be careful to avoid 0, otherwise the application will spin for a
   full timeslice while seq overflows into 0.

   However, since you now export the timehands generation, I'd really
   drop seq and use the timehands generation for synchronization.  It
   makes no sense to have both.

2. Since tc_update_timekeep() is called from tc_windup() it doesn't
   need to do the synchronization dance.

3. Like tc_windup, tc_update_timekeep() needs to have some
    membar_procer() calls in it instead of membar_consumer() calls.

4. There is no need to update th_counter_mask on every update.

5. What if the TSC is not available as a usable timecounter?  In that
   case libc should fall back on the system call.  But we need a way
   to communicate what the timecounter is and detect when we switch
   timecounters.  Maybe adding a timecounter ID to the page will help
   here.  But then MD code in libc will have to check the ID and
   dispatch to the right timecounter read function.

6. The major and minor fields probably should bbe uint32_t or maybe
    uint16_t.  You're not saving any space by making them uint8_t.

> 
> Paul 
> 
> diff --git lib/libc/arch/amd64/gen/Makefile.inc 
> lib/libc/arch/amd64/gen/Makefile.inc
> index e995309ed71..caa4452a3d9 100644
> --- lib/libc/arch/amd64/gen/Makefile.inc
> +++ lib/libc/arch/amd64/gen/Makefile.inc
> @@ -2,6 +2,6 @@
>  
>  SRCS+=       _setjmp.S fabs.S infinity.c ldexp.c modf.S nan.c setjmp.S \
>       sigsetjmp.S
> -SRCS+=       fpclassifyl.c isfinitel.c isinfl.c isnanl.c isnormall.c 
> signbitl.c
> +SRCS+=       fpclassifyl.c rdtsc.c isfinitel.c isinfl.c isnanl.c isnormall.c 
> signbitl.c
>  SRCS+=       flt_rounds.S fpgetmask.S fpgetround.S fpgetsticky.S fpsetmask.S 
> \
>       fpsetround.S fpsetsticky.S
> diff --git lib/libc/arch/amd64/gen/rdtsc.c lib/libc/arch/amd64/gen/rdtsc.c
> new file mode 100644
> index 00000000000..b14c862c61a
> --- /dev/null
> +++ lib/libc/arch/amd64/gen/rdtsc.c
> @@ -0,0 +1,26 @@
> +/*   $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <p...@irofti.net>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +
> +uint64_t
> +tc_get_timecount_md(void)
> +{
> +     uint32_t hi, lo;
> +     asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
> +     return ((uint64_t)lo)|(((uint64_t)hi)<<32);
> +}
> diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
> index cd056c85719..2b25d49f32a 100644
> --- lib/libc/asr/asr.c
> +++ lib/libc/asr/asr.c
> @@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
> timeout)
>       struct timespec pollstart, pollend, elapsed;
>       int r;
>  
> -     if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
> +     if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
>               return -1;
>  
>       while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
> -             if (clock_gettime(CLOCK_MONOTONIC, &pollend))
> +             if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
>                       return -1;
>               timespecsub(&pollend, &pollstart, &elapsed);
>               timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
> @@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
>               asr->a_rtime = 0;
>       }
>  
> -     if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> +     if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>               return;
>  
>       if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
> diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
> index 82de8fa33b7..02fd3013cc1 100644
> --- lib/libc/crypt/bcrypt.c
> +++ lib/libc/crypt/bcrypt.c
> @@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
>       char buf[_PASSWORD_LEN];
>       int duration;
>  
> -     clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
> +     WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
>       bcrypt_newhash("testpassword", r, buf, sizeof(buf));
> -     clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
> +     WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
>  
>       duration = after.tv_sec - before.tv_sec;
>       duration *= 1000000;
> diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
> index 270f54aada5..c5921851203 100644
> --- lib/libc/dlfcn/init.c
> +++ lib/libc/dlfcn/init.c
> @@ -30,6 +30,7 @@
>  #include <link.h>
>  #include <stdlib.h>          /* atexit */
>  #include <string.h>
> +#include <time.h>            /* timekeep */
>  #include <unistd.h>
>  
>  #include "init.h"
> @@ -45,8 +46,9 @@
>  /* XXX should be in an include file shared with csu */
>  char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
>  
> -/* provide definition for this */
> +/* provide definition for these */
>  int  _pagesize = 0;
> +void *_timekeep = NULL;
>  
>  /*
>   * In dynamicly linked binaries environ and __progname are overriden by
> @@ -68,6 +70,12 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
>  
>  /* provide definitions for these */
>  const dl_cb *_dl_cb __relro = NULL;
> +#if defined(__amd64)
> +uint64_t (*const tc_get_timecount)(void) = tc_get_timecount_md;
> +#else
> +uint64_t (*const tc_get_timecount)(void) = NULL;
> +#endif
> +
>  
>  void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
>  void
> @@ -105,6 +113,10 @@ _libc_preinit(int argc, char **argv, char **envp, 
> dl_cb_cb *cb)
>                       phnum = aux->au_v;
>                       break;
>  #endif /* !PIC */
> +             case AUX_openbsd_timekeep:
> +                     if (tc_get_timecount)
> +                             _timekeep = (void *)aux->au_v;
> +                     break;
>               }
>       }
>  
> diff --git lib/libc/gen/auth_subr.c lib/libc/gen/auth_subr.c
> index 1286a96fe40..32f86eda50f 100644
> --- lib/libc/gen/auth_subr.c
> +++ lib/libc/gen/auth_subr.c
> @@ -752,7 +752,7 @@ auth_check_expire(auth_session_t *as)
>  
>       if (as->pwd && (quad_t)as->pwd->pw_expire != 0) {
>               if (as->now.tv_sec == 0)
> -                     gettimeofday(&as->now, NULL);
> +                     WRAP(gettimeofday)(&as->now, NULL);
>               if ((quad_t)as->now.tv_sec >= (quad_t)as->pwd->pw_expire) {
>                       as->state &= ~AUTH_ALLOW;
>                       as->state |= AUTH_EXPIRED;
> @@ -779,7 +779,7 @@ auth_check_change(auth_session_t *as)
>  
>       if (as->pwd && (quad_t)as->pwd->pw_change) {
>               if (as->now.tv_sec == 0)
> -                     gettimeofday(&as->now, NULL);
> +                     WRAP(gettimeofday)(&as->now, NULL);
>               if (as->now.tv_sec >= (quad_t)as->pwd->pw_change) {
>                       as->state &= ~AUTH_ALLOW;
>                       as->state |= AUTH_PWEXPIRED;
> diff --git lib/libc/gen/time.c lib/libc/gen/time.c
> index 3bbd0d733d1..b3ce9a800f1 100644
> --- lib/libc/gen/time.c
> +++ lib/libc/gen/time.c
> @@ -36,7 +36,7 @@ time(time_t *t)
>  {
>       struct timeval tt;
>  
> -     if (gettimeofday(&tt, NULL) == -1)
> +     if (WRAP(gettimeofday)(&tt, NULL) == -1)
>               return (-1);
>       if (t)
>               *t = (time_t)tt.tv_sec;
> diff --git lib/libc/gen/times.c lib/libc/gen/times.c
> index 02e4dd44b5c..36841810d1b 100644
> --- lib/libc/gen/times.c
> +++ lib/libc/gen/times.c
> @@ -52,7 +52,7 @@ times(struct tms *tp)
>               return ((clock_t)-1);
>       tp->tms_cutime = CONVTCK(ru.ru_utime);
>       tp->tms_cstime = CONVTCK(ru.ru_stime);
> -     if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> +     if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>               return ((clock_t)-1);
>       return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
>  }
> diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
> index 520a5954025..845cbe80356 100644
> --- lib/libc/gen/timespec_get.c
> +++ lib/libc/gen/timespec_get.c
> @@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
>  {
>       switch (base) {
>       case TIME_UTC:
> -             if (clock_gettime(CLOCK_REALTIME, ts) == -1)
> +             if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
>                       return 0;
>               break;
>       default:
> diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h
> index ed112320fa2..a5b20eec27a 100644
> --- lib/libc/hidden/sys/time.h
> +++ lib/libc/hidden/sys/time.h
> @@ -20,11 +20,18 @@
>  
>  #include_next <sys/time.h>
>  
> +__BEGIN_HIDDEN_DECLS
> +void _microtime(struct timeval *tvp, struct __timekeep *tk);
> +void _nanotime(struct timespec *tsp, struct __timekeep *tk);
> +void _nanoruntime(struct timespec *ts, struct __timekeep *tk);
> +void _nanouptime(struct timespec *tsp, struct __timekeep *tk);
> +__END_HIDDEN_DECLS
> +
>  PROTO_NORMAL(adjfreq);
>  PROTO_NORMAL(adjtime);
>  PROTO_NORMAL(futimes);
>  PROTO_NORMAL(getitimer);
> -PROTO_NORMAL(gettimeofday);
> +PROTO_WRAP(gettimeofday);
>  PROTO_NORMAL(setitimer);
>  PROTO_NORMAL(settimeofday);
>  PROTO_NORMAL(utimes);
> diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
> index 18c49f8fcb9..44bd8e7c6e3 100644
> --- lib/libc/hidden/time.h
> +++ lib/libc/hidden/time.h
> @@ -18,18 +18,26 @@
>  #ifndef _LIBC_TIME_H_
>  #define      _LIBC_TIME_H_
>  
> +#include <sys/types.h>
> +
>  #include_next <time.h>
>  
>  #if 0
>  extern PROTO_NORMAL(tzname);
>  #endif
>  
> +__BEGIN_HIDDEN_DECLS
> +extern void  *_timekeep;
> +extern uint64_t      (*const tc_get_timecount)(void);
> +uint64_t     tc_get_timecount_md(void);
> +__END_HIDDEN_DECLS
> +
>  PROTO_NORMAL(asctime);
>  PROTO_NORMAL(asctime_r);
>  PROTO_STD_DEPRECATED(clock);
>  PROTO_DEPRECATED(clock_getcpuclockid);
>  PROTO_NORMAL(clock_getres);
> -PROTO_NORMAL(clock_gettime);
> +PROTO_WRAP(clock_gettime);
>  PROTO_NORMAL(clock_settime);
>  PROTO_STD_DEPRECATED(ctime);
>  PROTO_DEPRECATED(ctime_r);
> diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
> index 763e420bb88..9babb28470a 100644
> --- lib/libc/net/res_random.c
> +++ lib/libc/net/res_random.c
> @@ -219,7 +219,7 @@ res_initid(void)
>       if (ru_prf != NULL)
>               arc4random_buf(ru_prf, sizeof(*ru_prf));
>  
> -     clock_gettime(CLOCK_MONOTONIC, &ts);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>       ru_reseed = ts.tv_sec + RU_OUT;
>       ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; 
>  }
> @@ -232,7 +232,7 @@ __res_randomid(void)
>       u_int r;
>       static void *randomid_mutex;
>  
> -     clock_gettime(CLOCK_MONOTONIC, &ts);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>       pid = getpid();
>  
>       _MUTEX_LOCK(&randomid_mutex);
> diff --git lib/libc/rpc/auth_unix.c lib/libc/rpc/auth_unix.c
> index 402d98cede4..917a6d42b8a 100644
> --- lib/libc/rpc/auth_unix.c
> +++ lib/libc/rpc/auth_unix.c
> @@ -121,7 +121,7 @@ authunix_create(char *machname, int uid, int gid, int 
> len, int *aup_gids)
>       /*
>        * fill in param struct from the given params
>        */
> -     (void)gettimeofday(&now,  NULL);
> +     (void)WRAP(gettimeofday)(&now,  NULL);
>       aup.aup_time = now.tv_sec;
>       aup.aup_machname = machname;
>       aup.aup_uid = uid;
> @@ -274,7 +274,7 @@ authunix_refresh(AUTH *auth)
>               goto done;
>  
>       /* update the time and serialize in place */
> -     (void)gettimeofday(&now, NULL);
> +     (void)WRAP(gettimeofday)(&now, NULL);
>       aup.aup_time = now.tv_sec;
>       xdrs.x_op = XDR_ENCODE;
>       XDR_SETPOS(&xdrs, 0);
> diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
> index 8e6ef515b0e..927b4bf2028 100644
> --- lib/libc/rpc/clnt_tcp.c
> +++ lib/libc/rpc/clnt_tcp.c
> @@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
>       pfd[0].events = POLLIN;
>       TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
>       delta = wait;
> -     clock_gettime(CLOCK_MONOTONIC, &start);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>       for (;;) {
>               r = ppoll(pfd, 1, &delta, NULL);
>               save_errno = errno;
>  
> -             clock_gettime(CLOCK_MONOTONIC, &after);
> +             WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>               timespecsub(&start, &after, &duration);
>               timespecsub(&wait, &duration, &delta);
>               if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
> index 68d01674410..92e1d5c350d 100644
> --- lib/libc/rpc/clnt_udp.c
> +++ lib/libc/rpc/clnt_udp.c
> @@ -265,7 +265,7 @@ send_again:
>       reply_msg.acpted_rply.ar_results.where = resultsp;
>       reply_msg.acpted_rply.ar_results.proc = xresults;
>  
> -     clock_gettime(CLOCK_MONOTONIC, &start);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>       for (;;) {
>               switch (ppoll(pfd, 1, &wait, NULL)) {
>               case 0:
> @@ -283,7 +283,7 @@ send_again:
>                       /* FALLTHROUGH */
>               case -1:
>                       if (errno == EINTR) {
> -                             clock_gettime(CLOCK_MONOTONIC, &after);
> +                             WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>                               timespecsub(&after, &start, &duration);
>                               timespecadd(&time_waited, &duration, 
> &time_waited);
>                               if (timespeccmp(&time_waited, &timeout, <))
> diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
> index f9d7a70938f..6c99db84359 100644
> --- lib/libc/rpc/svc_tcp.c
> +++ lib/libc/rpc/svc_tcp.c
> @@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>        * A timeout is fatal for the connection.
>        */
>       delta = wait_per_try;
> -     clock_gettime(CLOCK_MONOTONIC, &start);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>       pfd[0].fd = sock;
>       pfd[0].events = POLLIN;
>       do {
> @@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>               case -1:
>                       if (errno != EINTR)
>                               goto fatal_err;
> -                     clock_gettime(CLOCK_MONOTONIC, &after);
> +                     WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>                       timespecsub(&after, &start, &duration);
>                       timespecsub(&wait_per_try, &duration, &delta);
>                       if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/shlib_version lib/libc/shlib_version
> index 06f98b01084..5fb0770494f 100644
> --- lib/libc/shlib_version
> +++ lib/libc/shlib_version
> @@ -1,4 +1,4 @@
>  major=96
> -minor=0
> +minor=1
>  # note: If changes were made to include/thread_private.h or if system calls
>  # were added/changed then librthread/shlib_version must also be updated.
> diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
> index 34769576ced..d57418d81bf 100644
> --- lib/libc/sys/Makefile.inc
> +++ lib/libc/sys/Makefile.inc
> @@ -12,7 +12,8 @@ SRCS+=      Ovfork.S brk.S ${CERROR} \
>  
>  # glue to offer userland wrappers for some syscalls
>  SRCS+=       posix_madvise.c pthread_sigmask.c \
> -     w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
> +     w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
> +     w_clock_gettime.c w_gettimeofday.c microtime.c
>  
>  # glue for compat with old syscall interfaces.
>  SRCS+=       ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c 
> truncate.c \
> @@ -43,7 +44,7 @@ SRCS+=      ${CANCEL:%=w_%.c} w_pread.c w_preadv.c 
> w_pwrite.c w_pwritev.c
>  ASM= __semctl.o __syscall.o __thrsigdivert.o \
>       access.o acct.o adjfreq.o adjtime.o \
>       bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
> -     clock_getres.o clock_gettime.o clock_settime.o \
> +     clock_getres.o clock_settime.o \
>       dup.o dup2.o dup3.o \
>       execve.o \
>       faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
> @@ -54,7 +55,7 @@ ASM=        __semctl.o __syscall.o __thrsigdivert.o \
>       getgroups.o getitimer.o getpeername.o getpgid.o \
>       getpriority.o getresgid.o getresuid.o \
>       getrlimit.o getrusage.o getsid.o getsockname.o \
> -     getsockopt.o gettimeofday.o ioctl.o \
> +     getsockopt.o ioctl.o \
>       kevent.o kill.o kqueue.o ktrace.o lchown.o \
>       link.o linkat.o listen.o lstat.o madvise.o \
>       minherit.o mkdir.o mkdirat.o mkfifo.o mkfifoat.o \
> @@ -109,7 +110,8 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
>  SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
>  DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
>  
> -HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
> +HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} 
> \
> +     clock_gettime.o gettimeofday.o
>  PHIDDEN=${HIDDEN:.o=.po}
>  SHIDDEN=${HIDDEN:.o=.so}
>  DHIDDEN=${HIDDEN:.o=.do}
> diff --git lib/libc/sys/microtime.c lib/libc/sys/microtime.c
> new file mode 100644
> index 00000000000..6b7b65762e7
> --- /dev/null
> +++ lib/libc/sys/microtime.c
> @@ -0,0 +1,157 @@
> +/*   $OpenBSD$ */
> +/*
> + * Copyright (c) 2000 Poul-Henning Kamp <p...@freebsd.org>
> + * Copyright (c) 2020 Paul Irofti <p...@irofti.net>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/atomic.h>
> +#include <sys/time.h>
> +
> +#include <time.h>
> +
> +/*
> + * Return the difference between the timehands' counter value now and what
> + * was when we copied it to the timehands' offset_count.
> + */
> +static __inline u_int
> +tc_delta(struct __timekeep *tk)
> +{
> +     return ((tc_get_timecount() - tk->th_offset_count) &
> +         tk->tc_counter_mask);
> +}
> +
> +static inline void
> +bintimeaddfrac(const struct bintime *bt, uint64_t x, struct bintime *ct)
> +{
> +     ct->sec = bt->sec;
> +     if (bt->frac > bt->frac + x)
> +             ct->sec++;
> +     ct->frac = bt->frac + x;
> +}
> +
> +static inline void
> +BINTIME_TO_TIMESPEC(const struct bintime *bt, struct timespec *ts)
> +{
> +     ts->tv_sec = bt->sec;
> +     ts->tv_nsec = (long)(((uint64_t)1000000000 * (uint32_t)(bt->frac >> 
> 32)) >> 32);
> +}
> +
> +static inline void
> +BINTIME_TO_TIMEVAL(const struct bintime *bt, struct timeval *tv)
> +{
> +     tv->tv_sec = bt->sec;
> +     tv->tv_usec = (long)(((uint64_t)1000000 * (uint32_t)(bt->frac >> 32)) 
> >> 32);
> +}
> +
> +static void
> +binuptime(struct bintime *bt, struct __timekeep *tk)
> +{
> +     u_int gen;
> +
> +     do {
> +             gen = tk->th_generation;
> +             membar_consumer();
> +             *bt = tk->th_offset;
> +             bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
> +             membar_consumer();
> +     } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +static inline void
> +bintimeadd(const struct bintime *bt, const struct bintime *ct,
> +    struct bintime *dt)
> +{
> +     dt->sec = bt->sec + ct->sec;
> +     if (bt->frac > bt->frac + ct->frac)
> +             dt->sec++;
> +     dt->frac = bt->frac + ct->frac;
> +}
> +
> +static inline void
> +bintimesub(const struct bintime *bt, const struct bintime *ct,
> +    struct bintime *dt)
> +{
> +     dt->sec = bt->sec - ct->sec;
> +     if (bt->frac < bt->frac - ct->frac)
> +             dt->sec--;
> +     dt->frac = bt->frac - ct->frac;
> +}
> +
> +static void
> +binruntime(struct bintime *bt, struct __timekeep *tk)
> +{
> +     u_int gen;
> +
> +     do {
> +             gen = tk->th_generation;
> +             membar_consumer();
> +             bintimeaddfrac(&tk->th_offset, tk->th_scale * tc_delta(tk), bt);
> +             bintimesub(bt, &tk->th_naptime, bt);
> +             membar_consumer();
> +     } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +static void
> +bintime(struct bintime *bt, struct __timekeep *tk)
> +{
> +     u_int gen;
> +
> +     do {
> +             gen = tk->th_generation;
> +             membar_consumer();
> +             *bt = tk->th_offset;
> +             bintimeaddfrac(bt, tk->th_scale * tc_delta(tk), bt);
> +             bintimeadd(bt, &tk->th_boottime, bt);
> +             membar_consumer();
> +     } while (gen == 0 || gen != tk->th_generation);
> +}
> +
> +void
> +_microtime(struct timeval *tvp, struct __timekeep *tk)
> +{
> +     struct bintime bt;
> +
> +     bintime(&bt, tk);
> +     BINTIME_TO_TIMEVAL(&bt, tvp);
> +}
> +
> +void
> +_nanotime(struct timespec *tsp, struct __timekeep *tk)
> +{
> +     struct bintime bt;
> +
> +     bintime(&bt, tk);
> +     BINTIME_TO_TIMESPEC(&bt, tsp);
> +}
> +
> +void
> +_nanoruntime(struct timespec *ts, struct __timekeep *tk)
> +{
> +     struct bintime bt;
> +
> +     binruntime(&bt, tk);
> +     BINTIME_TO_TIMESPEC(&bt, ts);
> +}
> +
> +
> +void
> +_nanouptime(struct timespec *tsp, struct __timekeep *tk)
> +{
> +     struct bintime bt;
> +
> +     binuptime(&bt, tk);
> +     BINTIME_TO_TIMESPEC(&bt, tsp);
> +}
> diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
> new file mode 100644
> index 00000000000..27c504fc285
> --- /dev/null
> +++ lib/libc/sys/w_clock_gettime.c
> @@ -0,0 +1,59 @@
> +/*   $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <p...@irofti.net>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <stdlib.h>
> +#include <time.h>
> +#include <err.h>
> +
> +#include <sys/time.h>
> +
> +int
> +WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
> +{
> +     struct __timekeep *timekeep;
> +     unsigned int seq;
> +
> +     if (_timekeep == NULL)
> +             return clock_gettime(clock_id, tp);
> +     timekeep = _timekeep;
> +
> +     switch (clock_id) {
> +     case CLOCK_REALTIME:
> +             do {
> +                     seq = timekeep->seq;
> +                     _nanotime(tp, timekeep);
> +             } while (seq == 0 || seq != timekeep->seq);
> +             break;
> +     case CLOCK_UPTIME:
> +             do {
> +                     seq = timekeep->seq;
> +                     _nanoruntime(tp, timekeep);
> +             } while (seq == 0 || seq != timekeep->seq);
> +             break;
> +     case CLOCK_MONOTONIC:
> +     case CLOCK_BOOTTIME:
> +             do {
> +                     seq = timekeep->seq;
> +                     _nanouptime(tp, timekeep);
> +             } while (seq == 0 || seq != timekeep->seq);
> +             break;
> +     default:
> +             return clock_gettime(clock_id, tp);
> +     }
> +     return 0;
> +}
> +DEF_WRAP(clock_gettime);
> diff --git lib/libc/sys/w_gettimeofday.c lib/libc/sys/w_gettimeofday.c
> new file mode 100644
> index 00000000000..3cf77e96e37
> --- /dev/null
> +++ lib/libc/sys/w_gettimeofday.c
> @@ -0,0 +1,42 @@
> +/*   $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Robert Nagy <rob...@openbsd.org>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/time.h>
> +
> +int
> +WRAP(gettimeofday)(struct timeval *tp, struct timezone *tzp)
> +{
> +     struct __timekeep *timekeep = _timekeep;
> +     static struct timezone zerotz = { 0, 0 };
> +     unsigned int seq;
> +
> +     if (timekeep == NULL)
> +             return gettimeofday(tp, tzp);
> +
> +     if (tp) {
> +             do {
> +                     seq = timekeep->seq;
> +                     _microtime(tp, timekeep);
> +             } while (seq == 0 || seq != timekeep->seq);
> +     }
> +
> +     if (tzp)
> +             tzp = &zerotz;
> +
> +     return 0;
> +}
> +DEF_WRAP(gettimeofday);
> diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
> index 788890add89..df2239438d2 100644
> --- lib/libc/thread/synch.h
> +++ lib/libc/thread/synch.h
> @@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, 
> const struct timespec *
>       if (abs == NULL)
>               return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
>  
> -     if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
> +     if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
>               return (EINVAL);
>  
>       rel.tv_sec = abs->tv_sec - rel.tv_sec;
> diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
> index 9b5b8eb3acf..59bc923a6fb 100644
> --- sys/kern/exec_elf.c
> +++ sys/kern/exec_elf.c
> @@ -124,7 +124,7 @@ extern char *syscallnames[];
>  /*
>   * How many entries are in the AuxInfo array we pass to the process?
>   */
> -#define ELF_AUX_ENTRIES      8
> +#define ELF_AUX_ENTRIES      9
>  
>  /*
>   * This is the OpenBSD ELF emul
> @@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
>               a->au_v = ap->arg_entry;
>               a++;
>  
> +             a->au_id = AUX_openbsd_timekeep;
> +             a->au_v = p->p_p->ps_timekeep;
> +             a++;
> +
>               a->au_id = AUX_null;
>               a->au_v = 0;
>               a++;
> diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
> index 20480c2fc28..15bf4db6fbd 100644
> --- sys/kern/kern_exec.c
> +++ sys/kern/kern_exec.c
> @@ -64,6 +64,11 @@
>  #include <uvm/uvm_extern.h>
>  #include <machine/tcb.h>
>  
> +#include <sys/time.h>
> +
> +struct uvm_object *timekeep_object;
> +struct __timekeep* timekeep;
> +
>  void unveil_destroy(struct process *ps);
>  
>  const struct kmem_va_mode kv_exec = {
> @@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
>   */
>  int exec_sigcode_map(struct process *, struct emul *);
>  
> +/*
> + * Map the shared timekeep page.
> + */
> +int exec_timekeep_map(struct process *);
> +
>  /*
>   * If non-zero, stackgap_random specifies the upper limit of the random gap 
> size
>   * added to the fixed stack position. Must be n^2.
> @@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
>       /* map the process's signal trampoline code */
>       if (exec_sigcode_map(pr, pack.ep_emul))
>               goto free_pack_abort;
> +     /* map the process's timekeep page */
> +     if (exec_timekeep_map(pr))
> +             goto free_pack_abort;
>  
>  #ifdef __HAVE_EXEC_MD_MAP
>       /* perform md specific mappings that process might need */
> @@ -863,3 +876,43 @@ exec_sigcode_map(struct process *pr, struct emul *e)
>  
>       return (0);
>  }
> +
> +int
> +exec_timekeep_map(struct process *pr)
> +{
> +     size_t timekeep_sz = sizeof(struct __timekeep);
> +
> +     /*
> +      * Similar to the sigcode object, except that there is a single timekeep
> +      * object, and not one per emulation.
> +      */
> +     if (timekeep_object == NULL) {
> +             vaddr_t va;
> +
> +             timekeep_object = uao_create(timekeep_sz, 0);
> +             uao_reference(timekeep_object);
> +
> +             if (uvm_map(kernel_map, &va, round_page(timekeep_sz), 
> timekeep_object,
> +                 0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | 
> PROT_WRITE,
> +                 MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
> +                     uao_detach(timekeep_object);
> +                     return (ENOMEM);
> +             }
> +
> +             timekeep = (struct __timekeep *)va;
> +             timekeep->major = 0;
> +             timekeep->minor = 0;
> +
> +             timekeep->seq = 0;
> +     }
> +
> +     uao_reference(timekeep_object);
> +     if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, 
> round_page(timekeep_sz),
> +         timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
> +         MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
> +             uao_detach(timekeep_object);
> +             return (ENOMEM);
> +     }
> +
> +     return (0);
> +}
> diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
> index 4b9eedf50b9..1a6db0102c3 100644
> --- sys/kern/kern_tc.c
> +++ sys/kern/kern_tc.c
> @@ -35,6 +35,7 @@
>  #include <sys/queue.h>
>  #include <sys/malloc.h>
>  #include <dev/rndvar.h>
> +#include <sys/time.h>
>  
>  /*
>   * A large step happens on boot.  This constant detects such steps.
> @@ -480,6 +481,34 @@ tc_setclock(const struct timespec *ts)
>  #endif
>  }
>  
> +void
> +tc_update_timekeep(void)
> +{
> +     struct timehands *th;
> +     u_int gen;
> +
> +     if (timekeep == NULL)
> +             return;
> +
> +     atomic_inc_int(&timekeep->seq);
> +
> +     do {
> +             th = timehands;
> +             gen = th->th_generation;
> +             membar_consumer();
> +             timekeep->th_scale = th->th_scale;
> +             timekeep->th_offset_count = th->th_offset_count;
> +             timekeep->th_offset = th->th_offset;
> +             timekeep->th_naptime = th->th_naptime;
> +             timekeep->th_boottime = th->th_boottime;
> +             timekeep->th_generation = th->th_generation;
> +             timekeep->tc_counter_mask = th->th_counter->tc_counter_mask;
> +             membar_consumer();
> +     } while (gen == 0 || gen != th->th_generation);
> +
> +     return;
> +}
> +
>  /*
>   * Initialize the next struct timehands in the ring and make
>   * it the active timehands.  Along the way we might switch to a different
> @@ -632,6 +661,8 @@ tc_windup(struct bintime *new_boottime, struct bintime 
> *new_offset,
>       time_uptime = th->th_offset.sec;
>       membar_producer();
>       timehands = th;
> +
> +     tc_update_timekeep();
>  }
>  
>  /* Report or change the active timecounter hardware. */
> diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
> index a40e0510273..f55b75f1e84 100644
> --- sys/sys/exec_elf.h
> +++ sys/sys/exec_elf.h
> @@ -691,7 +691,8 @@ enum AuxID {
>       AUX_sun_uid = 2000,             /* euid */
>       AUX_sun_ruid = 2001,            /* ruid */
>       AUX_sun_gid = 2002,             /* egid */
> -     AUX_sun_rgid = 2003             /* rgid */
> +     AUX_sun_rgid = 2003,            /* rgid */
> +     AUX_openbsd_timekeep = 2004,    /* userland clock_gettime */
>  };
>  
>  struct elf_args {
> diff --git sys/sys/proc.h sys/sys/proc.h
> index 357c0c0d52c..93a79a220db 100644
> --- sys/sys/proc.h
> +++ sys/sys/proc.h
> @@ -248,6 +248,8 @@ struct process {
>       u_int   ps_rtableid;            /* Process routing table/domain. */
>       char    ps_nice;                /* Process "nice" value. */
>  
> +     vaddr_t ps_timekeep;            /* User pointer to timekeep */
> +
>       struct uprof {                  /* profile arguments */
>               caddr_t pr_base;        /* buffer base */
>               size_t  pr_size;        /* buffer size */
> diff --git sys/sys/time.h sys/sys/time.h
> index e758a64ce07..0b48f65f3f1 100644
> --- sys/sys/time.h
> +++ sys/sys/time.h
> @@ -163,15 +163,30 @@ struct clockinfo {
>  };
>  #endif /* __BSD_VISIBLE */
>  
> -#if defined(_KERNEL) || defined(_STANDALONE)
> -#include <sys/_time.h>
> -
>  /* Time expressed as seconds and fractions of a second + operations on it. */
>  struct bintime {
>       time_t  sec;
>       uint64_t frac;
>  };
>  
> +struct __timekeep {
> +     uint8_t major;          /* version major number */
> +     uint8_t minor;          /* version minor number */
> +
> +     volatile unsigned int seq;      /* synchronization */
> +
> +     u_int64_t               th_scale;               /* [w] */
> +     u_int                   th_offset_count;        /* [w] */
> +     struct bintime          th_offset;              /* [w] */
> +     struct bintime          th_naptime;             /* [w] */
> +     struct bintime          th_boottime;            /* [tw] */
> +     volatile u_int          th_generation;          /* [w] */
> +     u_int                   tc_counter_mask;        /* [I] */
> +};
> +
> +#if defined(_KERNEL) || defined(_STANDALONE)
> +#include <sys/_time.h>
> +
>  #define bintimecmp(btp, ctp, cmp)                                    \
>       ((btp)->sec == (ctp)->sec ?                                     \
>           (btp)->frac cmp (ctp)->frac :                               \
> @@ -396,6 +411,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts)
>       return ts->tv_sec * 1000000000ULL + ts->tv_nsec;
>  }
>  
> +extern struct uvm_object *timekeep_object;
> +extern struct __timekeep *timekeep;
>  #else /* !_KERNEL */
>  #include <time.h>
>  
> 
> 

Reply via email to