Hi,

Here's a first attempt at adding clock_nanosleep(2).

Its use cases are admittedly more niche than nanosleep(2),
and indeed there are just a handful of spots in base where
I think it would be an improvement over existing code, but
it's definitely an improvement, particularly in simplicity,
where applicable.

This implementation introduces a rwlock in tc_setrealtimeclock()
and wakes threads when it exits the critical section, so absolute
CLOCK_REALTIME timeouts never miss a clock jump.

I feel that this is more of a hack than adjusting timeouts from
hardclock(9), and something like that that will probably be the
better solution if other absolute CLOCK_REALTIME timeouts are made
sensitive to jumps in the future, but I wanted to start here with
something minimally invasive.  I'm still unsure if making absolute
timeouts sensitive to clock jumps is even worth it, particularly in
cases where there is no portable alternative clock choice, like with
pthread_mutex_timedlock(3).

(Maybe of note is that this was guenther@'s approach several years
ago [1], but that patch did not land.)

... but with clock_nanosleep(2) you can use any clock, so I think
it's a perfectly valid feature with no real downsides to inclusion.

The patch also generalizes rwsleep(9) to cover both write and
read locks.  This seems like an obvious extension preferable to
adding yet another *sleep() wrapper just for read locks.

The patch puts the sleeps in tick-loops to handle arbitrary
timeouts for both nanosleep(2) and clock_nanosleep(2), removing
nanosleep's 100-million second upper bound.  Starting here, I want
to remove the 100-million second upper bound from system calls with
timeouts because:

  (a) It's a lie: 100 million seconds at 100hz is way, way
      more ticks than a signed 32-bit int can represent.

  (b) It's one less platform-specific thing for userspace to
      check; particularly cross-platform high-level languages,
      effectively all of which have an interface to nanosleep(2).

      Lots of sleeps don't check the return code: eliminating
      the 100-million second gotcha case makes debugging and
      reasoning about code easier.

Looking for feedback that I can spruce things up with and then
ask for OKs later (assuming there's no objection to including
the interface itself).

Thoughts & feedback?

--
Scott Cheloha

[1] https://marc.info/?l=openbsd-tech&m=128245846500419&w=2

Index: include/time.h
===================================================================
RCS file: /cvs/src/include/time.h,v
retrieving revision 1.30
diff -u -p -r1.30 time.h
--- include/time.h      5 Sep 2017 03:16:13 -0000       1.30
+++ include/time.h      26 May 2018 22:42:20 -0000
@@ -165,6 +165,7 @@ int nanosleep(const struct timespec *, s
 
 #if __POSIX_VISIBLE >= 200112
 int clock_getcpuclockid(pid_t, clockid_t *);
+int clock_nanosleep(clockid_t, int, const struct timespec *, struct timespec 
*);
 #endif
 
 #if __POSIX_VISIBLE >= 200809
Index: lib/libc/Symbols.list
===================================================================
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.62
diff -u -p -r1.62 Symbols.list
--- lib/libc/Symbols.list       5 Dec 2017 13:45:31 -0000       1.62
+++ lib/libc/Symbols.list       26 May 2018 22:42:21 -0000
@@ -58,6 +58,7 @@ _thread_sys_chown
 _thread_sys_chroot
 _thread_sys_clock_getres
 _thread_sys_clock_gettime
+_thread_sys_clock_nanosleep
 _thread_sys_clock_settime
 _thread_sys_close
 _thread_sys_closefrom
@@ -254,6 +255,7 @@ chown
 chroot
 clock_getres
 clock_gettime
+clock_nanosleep
 clock_settime
 close
 closefrom
Index: lib/libc/hidden/time.h
===================================================================
RCS file: /cvs/src/lib/libc/hidden/time.h,v
retrieving revision 1.5
diff -u -p -r1.5 time.h
--- lib/libc/hidden/time.h      5 Sep 2017 03:16:13 -0000       1.5
+++ lib/libc/hidden/time.h      26 May 2018 22:42:21 -0000
@@ -30,6 +30,7 @@ PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
 PROTO_NORMAL(clock_gettime);
+PROTO_CANCEL(clock_nanosleep);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
Index: lib/libc/sys/Makefile.inc
===================================================================
RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
retrieving revision 1.154
diff -u -p -r1.154 Makefile.inc
--- lib/libc/sys/Makefile.inc   12 Jan 2018 04:36:12 -0000      1.154
+++ lib/libc/sys/Makefile.inc   26 May 2018 22:42:21 -0000
@@ -27,7 +27,7 @@ SRCS+=        canceled.c
 
 # syscalls that would be normal...except for cancellation or SIGTHR
 CANCEL=        accept accept4 \
-       close closefrom connect \
+       clock_nanosleep close closefrom connect \
        fcntl fsync \
        msgrcv msgsnd msync \
        nanosleep \
@@ -175,8 +175,8 @@ ${HIDDEN}:  ; @${GENERATE.rsyscall_hidde
 
 MAN+=  __get_tcb.2 __thrsigdivert.2 __thrsleep.2 _exit.2 accept.2 \
        access.2 acct.2 adjfreq.2 adjtime.2 bind.2 brk.2 chdir.2 \
-       chflags.2 chmod.2 chown.2 chroot.2 clock_gettime.2 close.2 \
-       closefrom.2 connect.2 dup.2 execve.2 fcntl.2 fhopen.2 flock.2 \
+       chflags.2 chmod.2 chown.2 chroot.2 clock_gettime.2 clock_nanosleep.2 \
+       close.2 closefrom.2 connect.2 dup.2 execve.2 fcntl.2 fhopen.2 flock.2 \
        fork.2 fsync.2 futex.2 getentropy.2 getdents.2 getdtablecount.2 \
        getfh.2 getfsstat.2 getgid.2 getgroups.2 getitimer.2 getlogin.2 \
        getpeername.2 getpgrp.2 getpid.2 getpriority.2 getrlimit.2 \
Index: lib/libc/sys/clock_nanosleep.2
===================================================================
RCS file: lib/libc/sys/clock_nanosleep.2
diff -N lib/libc/sys/clock_nanosleep.2
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/libc/sys/clock_nanosleep.2      26 May 2018 22:42:21 -0000
@@ -0,0 +1,241 @@
+.\" $OpenBSD$
+.\"
+.\" Copyright (c) 2017 Scott Cheloha
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate$
+.Dt CLOCK_NANOSLEEP 2
+.Os
+.Sh NAME
+.Nm clock_nanosleep
+.Nd high resolution sleep with a specifiable clock
+.Sh SYNOPSIS
+.In time.h
+.Ft int
+.Fn clock_nanosleep "clockid_t clock_id" "int flags" "const struct timespec 
*timeout" "struct timespec *remainder"
+.Sh DESCRIPTION
+Absent any
+.Fa flags ,
+.Fn clock_nanosleep
+suspends execution of the calling thread for the duration specified by
+.Fa timeout
+as measured by the clock
+.Fa clock_id .
+.Pp
+If
+.Fa flags
+is set to
+.Dv TIMER_ABSTIME ,
+.Fn clock_nanosleep
+suspends execution of the calling thread until the absolute time
+counted by the clock
+.Fa clock_id
+reaches or exceeds the absolute time specified by
+.Fa timeout .
+.Pp
+The following values of
+.Fa clock_id
+are supported:
+.Pp
+.Bl -tag -width indent -offset indent -compact
+.It Dv CLOCK_REALTIME
+.It Dv CLOCK_MONOTONIC
+.It Dv CLOCK_UPTIME
+.It Dv CLOCK_BOOTTIME
+.El
+.Pp
+An unmasked signal will cause
+.Fn clock_nanosleep
+to terminate the sleep early,
+regardless of the
+.Dv SA_RESTART
+value on the interrupting signal.
+.Sh RETURN VALUES
+.Fn clock_nanosleep
+returns the value 0 if the requested duration has elapsed or the
+given absolute time has passed.
+.Pp
+If an unmasked signal is delivered before such time,
+the value \-1 is returned and the global variable
+.Va errno
+is set to note the interruption.
+In this case,
+if
+.Dv TIMER_ABSTIME
+is not set in
+.Fa flags
+and
+.Fa remainder
+is
+.Pf non- Dv NULL ,
+the timespec structure referenced by
+.Fa remainder
+is updated to contain the unslept amount:
+the requested duration minus the duration actually slept.
+.Pp
+Otherwise, the value \-1 is returned and the global variable
+.Va errno
+is set to indicate the error.
+.Sh EXAMPLES
+A typical delay loop with
+.Fn nanosleep
+looks like this:
+.Bd -literal
+struct timespec to = { 60, 0 };
+
+while (nanosleep(&to, &to) == -1) {
+       if (errno != EINTR)
+               err(1, "nanosleep");
+}
+.Ed
+.Pp
+The equivalent implementation with
+.Fn clock_nanosleep
+is very similar:
+.Bd -literal
+while (clock_nanosleep(CLOCK_MONOTONIC, 0, &to, &to) == -1) {
+       [...]
+}
+.Ed
+.Pp
+If we introduce work initiated by a signal into the delay loop and want to
+keep our original sixty second deadline,
+however,
+the
+.Xr nanosleep 2
+code becomes more awkward:
+.Bd -literal
+struct timespec to = { 60, 0 };
+struct timespec after, before, elapsed;
+
+while (nanosleep(&to, &to) == -1) {
+       if (errno != EINTR)
+               err(1, "nanosleep");
+       clock_gettime(CLOCK_MONOTONIC, &before);
+       work();
+       clock_gettime(CLOCK_MONOTONIC, &after);
+       timespecsub(&after, &before, &elapsed);
+       timespecsub(&to, &elapsed, &to);
+       if (to.tv_sec < 0)
+               timespecclear(&to);
+}
+.Ed
+.Pp
+The above example also does not account for time spent in
+signal-handling code,
+which could be substantial if signal volume is high.
+.Pp
+.Fn clock_nanosleep
+simplifies this with absolute timeouts,
+which can keep the total time spent in a delay loop much closer to a
+given maximum duration while eliminating the need for tedious measurement.
+.Pp
+The following code will tend to keep the total delay closer to sixty
+seconds than the prior example and is notably simpler:
+.Bd -literal
+struct timespec now, to;
+
+clock_gettime(CLOCK_MONOTONIC, &now);
+to.tv_sec = now.tv_sec + 60;
+to.tv_nsec = now.tv_nsec;
+
+while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &to, NULL) == -1) {
+       if (errno != EINTR)
+               err(1, "clock_nanosleep");
+       work();
+}
+.Ed
+.Sh ERRORS
+.Fn clock_nanosleep
+will return early if:
+.Bl -tag -width Er
+.It Bq Er EINTR
+A signal was delivered.
+.El
+.Pp
+.Fn clock_nanosleep
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+.Fa clock_id
+specified an invalid clock or
+.Dv CLOCK_THREAD_CPUTIME_ID .
+.It Bq Er EINVAL
+.Fa flags
+contained a flag other than
+.Dv TIMER_ABSTIME .
+.It Bq Er EINVAL
+.Fa timeout
+specified a nanosecond value less than zero
+or greater than or equal to 1000 million.
+.It Bq Er EINVAL
+.Dv TIMER_ABSTIME
+was not set in
+.Fa flags
+and
+.Fa timeout
+specified a second value less than zero.
+.It Bq Er EFAULT
+Either
+.Fa timeout
+or
+.Fa remainder
+pointed to memory that was not a valid part of the process address space.
+.It Bq Er ENOTSUP
+.Fa clock_id
+specified an unsupported clock.
+.El
+.Sh SEE ALSO
+.Xr sleep 1 ,
+.Xr clock_gettime 2 ,
+.Xr gettimeofday 2 ,
+.Xr nanosleep 2 ,
+.Xr sleep 3
+.Sh STANDARDS
+The
+.Fn clock_nanosleep
+function conforms to
+.St -p1003.1-2008 .
+.Pp
+Support for the
+.Dv CLOCK_BOOTIME
+and
+.Dv CLOCK_UPTIME
+clocks are extensions to that specification.
+.Sh HISTORY
+The
+.Fn clock_nanosleep
+system call has been available since
+.Ox 6.4 .
+.Sh CAVEATS
+If
+.Fa clock_id
+is
+.Dv CLOCK_REALTIME
+and
+.Dv TIMER_ABSTIME
+is set in
+.Fa flags ,
+the duration of the sleep may deviate from what the caller intended as the
+.Dv CLOCK_REALTIME
+clock is subject to the discontinuous jumps caused by
+.Xr clock_settime 2
+and
+.Xr settimeofday 2 .
+In such a case, a backwards change to the
+.Dv CLOCK_REALTIME
+clock will extend the maximum sleep duration and a forward change
+will shorten it.
+.Pp
+The other supported clocks are not subject to discontinuous jumps.
Index: lib/libc/sys/nanosleep.2
===================================================================
RCS file: /cvs/src/lib/libc/sys/nanosleep.2,v
retrieving revision 1.15
diff -u -p -r1.15 nanosleep.2
--- lib/libc/sys/nanosleep.2    31 May 2015 23:54:25 -0000      1.15
+++ lib/libc/sys/nanosleep.2    26 May 2018 22:42:21 -0000
@@ -76,8 +76,8 @@ to the corresponding value.
 was interrupted by the delivery of a signal.
 .It Bq Er EINVAL
 .Fa timeout
-specified a nanosecond value less than zero or greater than 1000 million,
-or a second value less than zero or greater than 100 million.
+specified a nanosecond value less than zero
+or greater than or equal to 1000 million.
 .It Bq Er EFAULT
 Either
 .Fa timeout
@@ -87,6 +87,7 @@ points to memory that is not a valid par
 .El
 .Sh SEE ALSO
 .Xr sleep 1 ,
+.Xr clock_nanosleep 2 ,
 .Xr sleep 3
 .Sh STANDARDS
 The
Index: lib/libc/sys/w_clock_nanosleep.c
===================================================================
RCS file: lib/libc/sys/w_clock_nanosleep.c
diff -N lib/libc/sys/w_clock_nanosleep.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/libc/sys/w_clock_nanosleep.c    26 May 2018 22:42:21 -0000
@@ -0,0 +1,32 @@
+/*     $OpenBSD$ */
+/*
+ * Copyright (c) 2015 Philip Guenther <guent...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <time.h>
+#include "cancel.h"
+
+int
+clock_nanosleep(clockid_t clock_id, int flags, const struct timespec *timeout,
+    struct timespec *remainder)
+{
+       int ret;
+
+       ENTER_CANCEL_POINT(1);
+       ret = HIDDEN(clock_nanosleep)(clock_id, flags, timeout, remainder);
+       LEAVE_CANCEL_POINT(1);
+       return (ret);
+}
+DEF_CANCEL(clock_nanosleep);
Index: lib/libpthread/man/pthread_testcancel.3
===================================================================
RCS file: /cvs/src/lib/libpthread/man/pthread_testcancel.3,v
retrieving revision 1.16
diff -u -p -r1.16 pthread_testcancel.3
--- lib/libpthread/man/pthread_testcancel.3     31 Aug 2014 20:23:10 -0000      
1.16
+++ lib/libpthread/man/pthread_testcancel.3     26 May 2018 22:42:21 -0000
@@ -101,6 +101,7 @@ type will be in effect.
 Cancellation points will occur when a thread is executing the following
 base interfaces:
 .Fn accept ,
+.Fn clock_nanosleep ,
 .Fn close ,
 .Fn connect ,
 .Fn creat ,
Index: share/man/man9/tsleep.9
===================================================================
RCS file: /cvs/src/share/man/man9/tsleep.9,v
retrieving revision 1.11
diff -u -p -r1.11 tsleep.9
--- share/man/man9/tsleep.9     13 Sep 2016 08:32:44 -0000      1.11
+++ share/man/man9/tsleep.9     26 May 2018 22:42:21 -0000
@@ -47,7 +47,7 @@
 .Ft int
 .Fn msleep "void *ident" "struct mutex *mtx" "int priority" "const char 
*wmesg" "int timo"
 .Ft int
-.Fn rwsleep "void *ident" "struct rwlock *wl" "int priority" "const char 
*wmesg" "int timo"
+.Fn rwsleep "void *ident" "struct rwlock *rwl" "int priority" "const char 
*wmesg" "int timo"
 .Ft void
 .Fn wakeup "void *ident"
 .Ft void
@@ -155,10 +155,10 @@ function behaves just like
 .Fn tsleep ,
 but takes an additional argument:
 .Bl -tag -width priority
-.It Fa wl
-A write lock that will be unlocked when the process is safely
+.It Fa rwl
+A read- or write-lock that will be unlocked when the process is safely
 on the sleep queue.
-The write lock will be relocked at the end of rwsleep unless the
+The lock will be relocked at the end of rwsleep unless the
 .Dv PNORELOCK
 flag is set in the
 .Fa priority
Index: sys/kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.94
diff -u -p -r1.94 kern_clock.c
--- sys/kern/kern_clock.c       14 May 2018 12:31:21 -0000      1.94
+++ sys/kern/kern_clock.c       26 May 2018 22:42:21 -0000
@@ -45,6 +45,7 @@
 #include <sys/proc.h>
 #include <sys/user.h>
 #include <sys/resourcevar.h>
+#include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/sched.h>
@@ -116,6 +117,9 @@ initclocks(void)
        softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
        if (softclock_si == NULL)
                panic("initclocks: unable to register softclock intr");
+
+       /* XXX where should this be done? */
+       rw_init(&rtc_lock, "rtc_lock");
 
        ticks = INT_MAX - (15 * 60 * hz);
        jiffies = ULONG_MAX - (10 * 60 * hz);
Index: sys/kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.144
diff -u -p -r1.144 kern_synch.c
--- sys/kern/kern_synch.c       24 Apr 2018 16:28:42 -0000      1.144
+++ sys/kern/kern_synch.c       26 May 2018 22:42:21 -0000
@@ -236,31 +236,32 @@ msleep(const volatile void *ident, struc
  * entered the sleep queue we drop the it. After sleeping we re-lock.
  */
 int
-rwsleep(const volatile void *ident, struct rwlock *wl, int priority,
+rwsleep(const volatile void *ident, struct rwlock *rwl, int priority,
     const char *wmesg, int timo)
 {
        struct sleep_state sls;
-       int error, error1;
+       int error, error1, status;
        WITNESS_SAVE_DECL(lock_fl);
 
        KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0);
-       rw_assert_wrlock(wl);
+       rw_assert_anylock(rwl);
+       status = rw_status(rwl);
 
        sleep_setup(&sls, ident, priority, wmesg);
        sleep_setup_timeout(&sls, timo);
        sleep_setup_signal(&sls, priority);
 
-       WITNESS_SAVE(&wl->rwl_lock_obj, lock_fl);
+       WITNESS_SAVE(&rwl->rwl_lock_obj, lock_fl);
 
-       rw_exit_write(wl);
+       rw_exit(rwl);
 
        sleep_finish(&sls, 1);
        error1 = sleep_finish_timeout(&sls);
        error = sleep_finish_signal(&sls);
 
        if ((priority & PNORELOCK) == 0) {
-               rw_enter_write(wl);
-               WITNESS_RESTORE(&wl->rwl_lock_obj, lock_fl);
+               rw_enter(rwl, status);
+               WITNESS_RESTORE(&rwl->rwl_lock_obj, lock_fl);
        }
 
        /* Signal errors are higher priority than timeouts. */
Index: sys/kern/kern_tc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_tc.c,v
retrieving revision 1.32
diff -u -p -r1.32 kern_tc.c
--- sys/kern/kern_tc.c  28 Apr 2018 15:44:59 -0000      1.32
+++ sys/kern/kern_tc.c  26 May 2018 22:42:21 -0000
@@ -24,6 +24,7 @@
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/timeout.h>
+#include <sys/rwlock.h>                /* XXX how do these get sorted 
in-kernel? */
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
@@ -110,6 +111,10 @@ struct bintime naptime;
 static struct bintime boottimebin;
 static int timestepwarnings;
 
+/* XXX where do these belong? */
+int rtc_chan;
+struct rwlock rtc_lock;
+
 void tc_windup(void);
 
 /*
@@ -291,10 +296,10 @@ tc_getfrequency(void)
        return (timehands->th_counter->tc_frequency);
 }
 
+
 /*
  * Step our concept of UTC, aka the realtime clock.
  * This is done by modifying our estimate of when we booted.
- * XXX: not locked.
  */
 void
 tc_setrealtimeclock(struct timespec *ts)
@@ -302,12 +307,18 @@ tc_setrealtimeclock(struct timespec *ts)
        struct timespec ts2;
        struct bintime bt, bt2;
 
+       rw_enter_write(&rtc_lock);
+
        binuptime(&bt2);
        timespec2bintime(ts, &bt);
        bintime_sub(&bt, &bt2);
        bintime_add(&bt2, &boottimebin);
        boottimebin = bt;
        bintime2timespec(&bt, &boottime);
+
+       wakeup(&rtc_chan);
+       rw_exit_write(&rtc_lock);
+
        enqueue_randomness(ts->tv_sec);
 
        /* XXX fiddle all the little crinkly bits around the fiords... */
Index: sys/kern/kern_time.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_time.c,v
retrieving revision 1.102
diff -u -p -r1.102 kern_time.c
--- sys/kern/kern_time.c        22 May 2018 18:33:41 -0000      1.102
+++ sys/kern/kern_time.c        26 May 2018 22:42:21 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: kern_time.c,v 1.102 2018/05/22 18:33:41 cheloha Exp $ */
+/*     $OpenBSD: kern_time.c,v 1.101 2018/02/19 08:59:52 mpi Exp $     */
 /*     $NetBSD: kern_time.c,v 1.20 1996/02/18 11:57:06 fvdl Exp $      */
 
 /*
@@ -37,6 +37,7 @@
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
+#include <sys/rwlock.h>                /* XXX how do these get sorted 
in-kernel? */
 #include <sys/ktrace.h>
 #include <sys/vnode.h>
 #include <sys/signalvar.h>
@@ -258,66 +259,166 @@ sys_clock_getres(struct proc *p, void *v
 }
 
 int
-sys_nanosleep(struct proc *p, void *v, register_t *retval)
+nanosleep_absolute(struct proc *p, clockid_t clock_id,
+    const struct timespec *deadline)
 {
+       struct timespec diff, now;
        static int nanowait;
-       struct sys_nanosleep_args/* {
-               syscallarg(const struct timespec *) rqtp;
-               syscallarg(struct timespec *) rmtp;
-       } */ *uap = v;
-       struct timespec rqt, rmt;
-       struct timespec sts, ets;
-       struct timespec *rmtp;
-       int error, error1;
-
-       rmtp = SCARG(uap, rmtp);
-       error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
-       if (error)
-               return (error);
-#ifdef KTRACE
-        if (KTRPOINT(p, KTR_STRUCT)) {
-               KERNEL_LOCK();
-               ktrreltimespec(p, &rqt);
-               KERNEL_UNLOCK();
-       }
-#endif
-
-       if (rqt.tv_sec > 100000000 || timespecfix(&rqt))
-               return (EINVAL);
+       int error;
 
-       if (rmtp)
-               getnanouptime(&sts);
+       error = EWOULDBLOCK;
 
-       error = tsleep(&nanowait, PWAIT | PCATCH, "nanosleep",
-           MAX(1, tstohz(&rqt)));
-       if (error == ERESTART)
-               error = EINTR;
-       if (error == EWOULDBLOCK)
-               error = 0;
+       if (clock_id == CLOCK_REALTIME)
+               rw_enter_read(&rtc_lock);
+       clock_gettime(p, clock_id, &now);
+       while (timespeccmp(&now, deadline, <) && error == EWOULDBLOCK) {
+               timespecsub(deadline, &now, &diff);
+               if (clock_id == CLOCK_REALTIME) {
+                       error = rwsleep(&rtc_chan, &rtc_lock, PWAIT | PCATCH,
+                           "nanosleep", MAX(1, tstohz(&diff)));
+               } else {
+                       error = tsleep(&nanowait, PWAIT | PCATCH, "nanosleep",
+                           MAX(1, tstohz(&diff)));
+               }
+               if (error == 0)
+                       error = EWOULDBLOCK;
+               clock_gettime(p, clock_id, &now);
+       }
+       if (clock_id == CLOCK_REALTIME)
+               rw_exit_read(&rtc_lock);
+       return error;
+}
 
-       if (rmtp) {
-               getnanouptime(&ets);
+int
+nanosleep_relative(struct proc *p, clockid_t clock_id,
+    const struct timespec *timeout, struct timespec *remainder)
+{
+       struct timespec after, before, elapsed, rmt;
+       clockid_t effective_clock;
+       static int nanowait;
+       int error;
 
-               memset(&rmt, 0, sizeof(rmt));
-               timespecsub(&ets, &sts, &sts);
-               timespecsub(&rqt, &sts, &rmt);
+       switch (clock_id) {
+       case CLOCK_REALTIME:
+       case CLOCK_MONOTONIC:
+       case CLOCK_BOOTTIME:
+               effective_clock = CLOCK_MONOTONIC;
+               break;
+       case CLOCK_UPTIME:
+               effective_clock = CLOCK_UPTIME;
+               break;
+       default:
+               return EINVAL;
+       }
+       rmt = *timeout;
+       error = EWOULDBLOCK;
 
+       while (timespecisset(&rmt) && error == EWOULDBLOCK) {
+               clock_gettime(p, effective_clock, &before);
+               error = tsleep(&nanowait, PWAIT | PCATCH, "nanosleep",
+                   MAX(1, tstohz(&rmt)));
+               clock_gettime(p, effective_clock, &after);
+               timespecsub(&after, &before, &elapsed);
+               timespecsub(&rmt, &elapsed, &rmt);
                if (rmt.tv_sec < 0)
                        timespecclear(&rmt);
+       }
+       if (remainder != NULL)
+               *remainder = rmt;
+       return error;
+}
+
+int
+clock_nanosleep(struct proc *p, clockid_t clock_id, int flags,
+    const struct timespec *rqtp, struct timespec *rmtp)
+{
+       struct timespec rqt, rmt;
+       int copyout_error, error;
+
+       switch (clock_id) {
+       case CLOCK_REALTIME:
+       case CLOCK_MONOTONIC:
+       case CLOCK_UPTIME:
+       case CLOCK_BOOTTIME:
+               break;
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return ENOTSUP;
+       case CLOCK_THREAD_CPUTIME_ID:
+       default:
+               return EINVAL;
+       }
+       if (flags & ~TIMER_ABSTIME)
+               return EINVAL;
+       error = copyin(rqtp, &rqt, sizeof(rqt));
+       if (error)
+               return error;
 
-               error1 = copyout(&rmt, rmtp, sizeof(rmt));
-               if (error1 != 0)
-                       error = error1;
+       if (flags & TIMER_ABSTIME) {
+               if (rqt.tv_nsec < 0 || rqt.tv_nsec >= 1000000000)
+                       return EINVAL;
 #ifdef KTRACE
-               if (error1 == 0 && KTRPOINT(p, KTR_STRUCT)) {
+               if (KTRPOINT(p, KTR_STRUCT)) {
                        KERNEL_LOCK();
-                       ktrreltimespec(p, &rmt);
+                       ktrabstimespec(p, &rqt);
                        KERNEL_UNLOCK();
                }
 #endif
+               error = nanosleep_absolute(p, clock_id, &rqt);
+       } else {
+               if (rqt.tv_sec < 0 ||
+                   rqt.tv_nsec < 0 || rqt.tv_nsec >= 1000000000)
+                       return (EINVAL);
+#ifdef KTRACE
+               if (KTRPOINT(p, KTR_STRUCT)) {
+                       KERNEL_LOCK();
+                       ktrreltimespec(p, &rqt);
+                       KERNEL_UNLOCK();
+               }
+#endif
+               error = nanosleep_relative(p, clock_id, &rqt,
+                   (rmtp != NULL) ? &rmt : NULL);
+               if (rmtp != NULL) {
+                       copyout_error = copyout(&rmt, rmtp, sizeof(rmt));
+                       if (copyout_error)
+                               error = copyout_error;
+#ifdef KTRACE
+                       if (copyout_error == 0 && KTRPOINT(p, KTR_STRUCT)) {
+                               KERNEL_LOCK();
+                               ktrreltimespec(p, &rmt);
+                               KERNEL_UNLOCK();
+                       }
+#endif
+               }
        }
-
+       if (error == ERESTART)
+               error = EINTR;
+       else if (error == EWOULDBLOCK)
+               error = 0;
        return error;
+}
+
+int
+sys_clock_nanosleep(struct proc *p, void *v, register_t *retval)
+{
+       struct sys_clock_nanosleep_args /* {
+               syscallarg(clockid_t) clock_id;
+               syscallarg(int) flags;
+               syscallarg(const struct timespec *) rqtp;
+               syscallarg(struct timespec *) rmtp;
+       } */ *uap = v;
+       return clock_nanosleep(p, SCARG(uap, clock_id), SCARG(uap, flags),
+           SCARG(uap, rqtp), SCARG(uap, rmtp));
+}
+
+int
+sys_nanosleep(struct proc *p, void *v, register_t *retval)
+{
+       struct sys_nanosleep_args/* {
+               syscallarg(const struct timespec *) rqtp;
+               syscallarg(struct timespec *) rmtp;
+       } */ *uap = v;
+       return clock_nanosleep(p, CLOCK_MONOTONIC, 0, SCARG(uap, rqtp),
+           SCARG(uap, rmtp));
 }
 
 int
Index: sys/kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.180
diff -u -p -r1.180 syscalls.master
--- sys/kern/syscalls.master    12 Dec 2017 01:12:34 -0000      1.180
+++ sys/kern/syscalls.master    26 May 2018 22:42:22 -0000
@@ -240,7 +240,9 @@
                            int flags); }
 113    UNIMPL          fktrace
 114    OBSOL           osendmsg
-115    OBSOL           vtrace
+115    STD             { int sys_clock_nanosleep(clockid_t clock_id, \
+                           int flags, const struct timespec *rqtp, \
+                           struct timespec *rmtp); }
 116    OBSOL           t32_gettimeofday
 117    OBSOL           t32_getrusage
 118    STD             { int sys_getsockopt(int s, int level, int name, \
Index: sys/sys/timetc.h
===================================================================
RCS file: /cvs/src/sys/sys/timetc.h,v
retrieving revision 1.5
diff -u -p -r1.5 timetc.h
--- sys/sys/timetc.h    3 Apr 2014 17:58:31 -0000       1.5
+++ sys/sys/timetc.h    26 May 2018 22:42:22 -0000
@@ -79,6 +79,11 @@ struct timecounter {
 
 extern struct timecounter *timecounter;
 
+/* XXX predeclaration appropriate?  does this even belong here? */
+struct rwlock;
+extern struct rwlock rtc_lock;
+extern int rtc_chan;
+
 u_int64_t tc_getfrequency(void);
 void   tc_init(struct timecounter *tc);
 void   tc_setclock(struct timespec *ts);
Index: usr.bin/kdump/kdump.c
===================================================================
RCS file: /cvs/src/usr.bin/kdump/kdump.c,v
retrieving revision 1.133
diff -u -p -r1.133 kdump.c
--- usr.bin/kdump/kdump.c       28 Nov 2017 15:35:02 -0000      1.133
+++ usr.bin/kdump/kdump.c       26 May 2018 22:42:22 -0000
@@ -552,6 +552,7 @@ static void (*formatters[])(int) = {
        gidname,
        syslogflagname,
        futexflagname,
+       timerflagsname,
 };
 
 enum {
@@ -636,6 +637,7 @@ enum {
        Gidname,
        Syslogflagname,
        Futexflagname,
+       Timerflagsname,
 };
 
 #define Pptr           Phexlong
@@ -741,6 +743,7 @@ static const formatter scargs[][8] = {
     [SYS_clock_getres] = { Clockname, Pptr },
     [SYS_dup2]         = { Pfd, Pfd },
     [SYS_nanosleep]    = { Pptr, Pptr },
+    [SYS_clock_nanosleep] = { Clockname, Timerflagsname, Pptr, Pptr },
     [SYS_fcntl]                = { Pfd, PASS_TWO, Fcntlcmdname },
     [SYS_accept4]      = { Pfd, Pptr, Pptr, Sockflagsname },
     [SYS___thrsleep]   = { Pptr, Clockname, Pptr, Pptr, Pptr },
Index: usr.bin/kdump/kdump_subr.h
===================================================================
RCS file: /cvs/src/usr.bin/kdump/kdump_subr.h,v
retrieving revision 1.21
diff -u -p -r1.21 kdump_subr.h
--- usr.bin/kdump/kdump_subr.h  28 Apr 2017 13:53:05 -0000      1.21
+++ usr.bin/kdump/kdump_subr.h  26 May 2018 22:42:22 -0000
@@ -98,5 +98,6 @@ void evfflagsname(int, int);
 void pollfdeventname(int);
 void syslogflagname(int);
 void futexflagname(int);
+void timerflagsname(int);
 
 extern int decimal, fancy, basecol, arg1;
Index: usr.bin/kdump/mksubr
===================================================================
RCS file: /cvs/src/usr.bin/kdump/mksubr,v
retrieving revision 1.35
diff -u -p -r1.35 mksubr
--- usr.bin/kdump/mksubr        14 Feb 2018 17:26:56 -0000      1.35
+++ usr.bin/kdump/mksubr        26 May 2018 22:42:22 -0000
@@ -314,7 +314,7 @@ auto_or_type "accessmodename" "[A-Z]_OK[
 auto_or_type "mmapprotname" "PROT_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" 
"sys/mman.h"
 auto_or_type "mmapflagsname" "(__)?MAP_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" 
"sys/mman.h"
 auto_orz_type "wait4optname" "W[A-Z]+[[:space:]]+[0-9]+" "sys/wait.h"
-#auto_or_type "timerflagsname" "TIMER_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" 
"sys/time.h"
+auto_or_type "timerflagsname" "TIMER_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" 
"sys/_time.h"
 #auto_or_type "getfsstatflagsname" "MNT_[A-Z]+[[:space:]]+[1-9][0-9]*" 
"sys/mount.h"
 auto_orz_type "mountflagsname" "MNT_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mount.h"
 auto_or_type "rebootoptname" "RB_[A-Z]+[[:space:]]+0x[0-9]+" "sys/reboot.h"

Reply via email to