Module Name:    src
Committed By:   thorpej
Date:           Sat Dec  5 18:17:01 UTC 2020

Modified Files:
        src/sys/kern: init_main.c kern_clock.c kern_exec.c kern_exit.c
            kern_time.c
        src/sys/sys: param.h proc.h timevar.h

Log Message:
Refactor interval timers to make it possible to support types other than
the BSD/POSIX per-process timers:

- "struct ptimer" is split into "struct itimer" (common interval timer
  data) and "struct ptimer" (per-process timer data, which contains a
  "struct itimer").

- Introduce a new "struct itimer_ops" that supplies information about
  the specific kind of interval timer, including it's processing
  queue, the softint handle used to schedule processing, the function
  to call when the timer fires (which adds it to the queue), and an
  optional function to call when the CLOCK_REALTIME clock is changed by
  a call to clock_settime() or settimeofday().

- Rename some fuctions to clearly identify what they're operating on
  (ptimer vs itimer).

- Use kmem(9) to allocate ptimer-related structures, rather than having
  dedicated pools for them.

Welcome to NetBSD 9.99.77.


To generate a diff of this commit:
cvs rdiff -u -r1.533 -r1.534 src/sys/kern/init_main.c
cvs rdiff -u -r1.142 -r1.143 src/sys/kern/kern_clock.c
cvs rdiff -u -r1.503 -r1.504 src/sys/kern/kern_exec.c
cvs rdiff -u -r1.290 -r1.291 src/sys/kern/kern_exit.c
cvs rdiff -u -r1.206 -r1.207 src/sys/kern/kern_time.c
cvs rdiff -u -r1.680 -r1.681 src/sys/sys/param.h
cvs rdiff -u -r1.367 -r1.368 src/sys/sys/proc.h
cvs rdiff -u -r1.44 -r1.45 src/sys/sys/timevar.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/init_main.c
diff -u src/sys/kern/init_main.c:1.533 src/sys/kern/init_main.c:1.534
--- src/sys/kern/init_main.c:1.533	Thu Nov 12 07:44:01 2020
+++ src/sys/kern/init_main.c	Sat Dec  5 18:17:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: init_main.c,v 1.533 2020/11/12 07:44:01 simonb Exp $	*/
+/*	$NetBSD: init_main.c,v 1.534 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc.
@@ -97,7 +97,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.533 2020/11/12 07:44:01 simonb Exp $");
+__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.534 2020/12/05 18:17:01 thorpej Exp $");
 
 #include "opt_cnmagic.h"
 #include "opt_ddb.h"
@@ -432,9 +432,6 @@ main(void)
 	/* Charge root for one process. */
 	(void)chgproccnt(0, 1);
 
-	/* Initialize timekeeping. */
-	time_init();
-
 	/* Initialize the run queues, turnstiles and sleep queues. */
 	sched_rqinit();
 	turnstile_init();
@@ -452,8 +449,8 @@ main(void)
 	error = mi_cpu_attach(curcpu());
 	KASSERT(error == 0);
 
-	/* Initialize timekeeping, part 2. */
-	time_init2();
+	/* Initialize timekeeping. */
+	time_init();
 
 	/*
 	 * Initialize mbuf's.  Do this now because we might attempt to

Index: src/sys/kern/kern_clock.c
diff -u src/sys/kern/kern_clock.c:1.142 src/sys/kern/kern_clock.c:1.143
--- src/sys/kern/kern_clock.c:1.142	Sun Oct 11 18:39:09 2020
+++ src/sys/kern/kern_clock.c	Sat Dec  5 18:17:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_clock.c,v 1.142 2020/10/11 18:39:09 thorpej Exp $	*/
+/*	$NetBSD: kern_clock.c,v 1.143 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -69,7 +69,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.142 2020/10/11 18:39:09 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.143 2020/12/05 18:17:01 thorpej Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_dtrace.h"
@@ -238,7 +238,7 @@ hardclock(struct clockframe *frame)
 	ci = curcpu();
 	l = ci->ci_onproc;
 
-	timer_tick(l, CLKF_USERMODE(frame));
+	ptimer_tick(l, CLKF_USERMODE(frame));
 
 	/*
 	 * If no separate statistics clock is available, run it from here.

Index: src/sys/kern/kern_exec.c
diff -u src/sys/kern/kern_exec.c:1.503 src/sys/kern/kern_exec.c:1.504
--- src/sys/kern/kern_exec.c:1.503	Wed Nov 25 21:08:59 2020
+++ src/sys/kern/kern_exec.c	Sat Dec  5 18:17:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_exec.c,v 1.503 2020/11/25 21:08:59 wiz Exp $	*/
+/*	$NetBSD: kern_exec.c,v 1.504 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.503 2020/11/25 21:08:59 wiz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.504 2020/12/05 18:17:01 thorpej Exp $");
 
 #include "opt_exec.h"
 #include "opt_execfmt.h"
@@ -1209,7 +1209,7 @@ execve_runproc(struct lwp *l, struct exe
 		lwp_ctl_exit();
 
 	/* Remove POSIX timers */
-	timers_free(p, TIMERS_POSIX);
+	ptimers_free(p, TIMERS_POSIX);
 
 	/* Set the PaX flags. */
 	pax_set_flags(epp, p);

Index: src/sys/kern/kern_exit.c
diff -u src/sys/kern/kern_exit.c:1.290 src/sys/kern/kern_exit.c:1.291
--- src/sys/kern/kern_exit.c:1.290	Sat May 23 23:42:43 2020
+++ src/sys/kern/kern_exit.c	Sat Dec  5 18:17:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_exit.c,v 1.290 2020/05/23 23:42:43 ad Exp $	*/
+/*	$NetBSD: kern_exit.c,v 1.291 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.290 2020/05/23 23:42:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.291 2020/12/05 18:17:01 thorpej Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_dtrace.h"
@@ -288,7 +288,7 @@ exit1(struct lwp *l, int exitcode, int s
 
 	DPRINTF(("%s: %d.%d exiting.\n", __func__, p->p_pid, l->l_lid));
 
-	timers_free(p, TIMERS_ALL);
+	ptimers_free(p, TIMERS_ALL);
 #if defined(__HAVE_RAS)
 	ras_purgeall();
 #endif

Index: src/sys/kern/kern_time.c
diff -u src/sys/kern/kern_time.c:1.206 src/sys/kern/kern_time.c:1.207
--- src/sys/kern/kern_time.c:1.206	Tue Oct 27 00:07:18 2020
+++ src/sys/kern/kern_time.c	Sat Dec  5 18:17:01 2020
@@ -1,11 +1,12 @@
-/*	$NetBSD: kern_time.c,v 1.206 2020/10/27 00:07:18 nia Exp $	*/
+/*	$NetBSD: kern_time.c,v 1.207 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*-
- * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009 The NetBSD Foundation, Inc.
+ * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009, 2020
+ *     The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
- * by Christopher G. Demetriou, and by Andrew Doran.
+ * by Christopher G. Demetriou, by Andrew Doran, and by Jason R. Thorpe.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -61,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.206 2020/10/27 00:07:18 nia Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.207 2020/12/05 18:17:01 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/resourcevar.h>
@@ -78,16 +79,12 @@ __KERNEL_RCSID(0, "$NetBSD: kern_time.c,
 #include <sys/syscallargs.h>
 #include <sys/cpu.h>
 
-static void	timer_intr(void *);
-static void	itimerfire(struct ptimer *);
-static void	itimerfree(struct ptimers *, int);
+static kmutex_t	itimer_mutex __cacheline_aligned;
+static struct itlist itimer_realtime_changed_notify;
 
-kmutex_t	timer_lock;
-
-static void	*timer_sih;
-static TAILQ_HEAD(, ptimer) timer_queue;
-
-struct pool ptimer_pool, ptimers_pool;
+static void	ptimer_intr(void *);
+static void	*ptimer_sih __read_mostly;
+static struct itqueue ptimer_queue;
 
 #define	CLOCK_VIRTUAL_P(clockid)	\
 	((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF)
@@ -106,23 +103,76 @@ void
 time_init(void)
 {
 
-	pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl",
-	    &pool_allocator_nointr, IPL_NONE);
-	pool_init(&ptimers_pool, sizeof(struct ptimers), 0, 0, 0, "ptimerspl",
-	    &pool_allocator_nointr, IPL_NONE);
+	mutex_init(&itimer_mutex, MUTEX_DEFAULT, IPL_SCHED);
+	LIST_INIT(&itimer_realtime_changed_notify);
+
+	TAILQ_INIT(&ptimer_queue);
+	ptimer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
+	    ptimer_intr, NULL);
+}
+
+/*
+ * Check if the time will wrap if set to ts.
+ *
+ * ts - timespec describing the new time
+ * delta - the delta between the current time and ts
+ */
+bool
+time_wraps(struct timespec *ts, struct timespec *delta)
+{
+
+	/*
+	 * Don't allow the time to be set forward so far it
+	 * will wrap and become negative, thus allowing an
+	 * attacker to bypass the next check below.  The
+	 * cutoff is 1 year before rollover occurs, so even
+	 * if the attacker uses adjtime(2) to move the time
+	 * past the cutoff, it will take a very long time
+	 * to get to the wrap point.
+	 */
+	if ((ts->tv_sec > LLONG_MAX - 365*24*60*60) ||
+	    (delta->tv_sec < 0 || delta->tv_nsec < 0))
+		return true;
+
+	return false;
+}
+
+/*
+ * itimer_lock:
+ *
+ *	Acquire the interval timer data lock.
+ */
+void
+itimer_lock(void)
+{
+	mutex_spin_enter(&itimer_mutex);
 }
 
+/*
+ * itimer_unlock:
+ *
+ *	Release the interval timer data lock.
+ */
 void
-time_init2(void)
+itimer_unlock(void)
 {
+	mutex_spin_exit(&itimer_mutex);
+}
 
-	TAILQ_INIT(&timer_queue);
-	mutex_init(&timer_lock, MUTEX_DEFAULT, IPL_SCHED);
-	timer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
-	    timer_intr, NULL);
+/*
+ * itimer_lock_held:
+ *
+ *	Check that the interval timer lock is held for diagnostic
+ *	assertions.
+ */
+static inline bool __diagused
+itimer_lock_held(void)
+{
+	return mutex_owned(&itimer_mutex);
 }
 
-/* Time of day and interval timer support.
+/*
+ * Time of day and interval timer support.
  *
  * These routines provide the kernel entry points to get and set
  * the time-of-day and per-process interval timers.  Subroutines
@@ -144,7 +194,6 @@ settime1(struct proc *p, const struct ti
 	if (ts->tv_sec < 0 || ts->tv_sec > (1LL << 36))
 		return (EINVAL);
 
-	/* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
 	nanotime(&now);
 	timespecsub(ts, &now, &delta);
 
@@ -164,6 +213,23 @@ settime1(struct proc *p, const struct ti
 
 	resettodr();
 
+	/*
+	 * Notify pending CLOCK_REALTIME timers about the real time change.
+	 * There may be inactive timers on this list, but this happens
+	 * comparatively less often than timers firing, and so it's better
+	 * to put the extra checks here than to complicate the other code
+	 * path.
+	 */
+	struct itimer *it;
+	itimer_lock();
+	LIST_FOREACH(it, &itimer_realtime_changed_notify, it_rtchgq) {
+		KASSERT(it->it_ops->ito_realtime_changed != NULL);
+		if (timespecisset(&it->it_time.it_value)) {
+			(*it->it_ops->ito_realtime_changed)(it);
+		}
+	}
+	itimer_unlock();
+
 	return (0);
 }
 
@@ -558,184 +624,278 @@ adjtime1(const struct timeval *delta, st
 }
 
 /*
- * Interval timer support. Both the BSD getitimer() family and the POSIX
- * timer_*() family of routines are supported.
+ * Interval timer support.
  *
- * All timers are kept in an array pointed to by p_timers, which is
- * allocated on demand - many processes don't use timers at all. The
- * first four elements in this array are reserved for the BSD timers:
- * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, element
- * 2 is ITIMER_PROF, and element 3 is ITIMER_MONOTONIC. The rest may be
- * allocated by the timer_create() syscall.
+ * The itimer_*() routines provide generic support for interval timers,
+ * both real (CLOCK_REALTIME, CLOCK_MONOTIME), and virtual (CLOCK_VIRTUAL,
+ * CLOCK_PROF).
  *
- * Realtime timers are kept in the ptimer structure as an absolute
- * time; virtual time timers are kept as a linked list of deltas.
- * Virtual time timers are processed in the hardclock() routine of
- * kern_clock.c.  The real time timer is processed by a callout
- * routine, called from the softclock() routine.  Since a callout may
- * be delayed in real time due to interrupt processing in the system,
- * it is possible for the real time timeout routine (realtimeexpire,
- * given below), to be delayed in real time past when it is supposed
- * to occur.  It does not suffice, therefore, to reload the real timer
- * .it_value from the real time timers .it_interval.  Rather, we
- * compute the next time in absolute time the timer should go off.  */
+ * Real timers keep their deadline as an absolute time, and are fired
+ * by a callout.  Virtual timers are kept as a linked-list of deltas,
+ * and are processed by hardclock().
+ *
+ * Because the real time timer callout may be delayed in real time due
+ * to interrupt processing on the system, it is possible for the real
+ * time timeout routine (itimer_callout()) run past after its deadline.
+ * It does not suffice, therefore, to reload the real timer .it_value
+ * from the timer's .it_interval.  Rather, we compute the next deadline
+ * in absolute time based on the current time and the .it_interval value,
+ * and report any overruns.
+ *
+ * Note that while the virtual timers are supported in a generic fashion
+ * here, they only (currently) make sense as per-process timers, and thus
+ * only really work for that case.
+ */
 
-/* Allocate a POSIX realtime timer. */
-int
-sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap,
-    register_t *retval)
+/*
+ * itimer_init:
+ *
+ *	Initialize the common data for an interval timer.
+ */
+static void
+itimer_init(struct itimer * const it, const struct itimer_ops * const ops,
+    clockid_t const id, struct itlist * const itl)
 {
-	/* {
-		syscallarg(clockid_t) clock_id;
-		syscallarg(struct sigevent *) evp;
-		syscallarg(timer_t *) timerid;
-	} */
 
-	return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
-	    SCARG(uap, evp), copyin, l);
+	KASSERT(itimer_lock_held());
+	KASSERT(ops != NULL);
+
+	timespecclear(&it->it_time.it_value);
+	it->it_ops = ops;
+	it->it_clockid = id;
+	it->it_overruns = 0;
+	it->it_queued = false;
+	it->it_dying = false;
+	if (!CLOCK_VIRTUAL_P(id)) {
+		KASSERT(itl == NULL);
+		callout_init(&it->it_ch, CALLOUT_MPSAFE);
+		if (id == CLOCK_REALTIME && ops->ito_realtime_changed != NULL) {
+			LIST_INSERT_HEAD(&itimer_realtime_changed_notify,
+			    it, it_rtchgq);
+		}
+	} else {
+		KASSERT(itl != NULL);
+		it->it_vlist = itl;
+		it->it_active = false;
+	}
 }
 
-int
-timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
-    copyin_t fetch_event, struct lwp *l)
+/*
+ * itimer_fini:
+ *
+ *	Release resources used by an interval timer.
+ *
+ *	N.B. itimer_lock must be held on entry, and is released on exit.
+ */
+static void
+itimer_fini(struct itimer * const it)
 {
-	int error;
-	timer_t timerid;
-	struct ptimers *pts;
-	struct ptimer *pt;
-	struct proc *p;
-
-	p = l->l_proc;
 
-	if ((u_int)id > CLOCK_MONOTONIC)
-		return (EINVAL);
+	KASSERT(itimer_lock_held());
 
-	if ((pts = p->p_timers) == NULL)
-		pts = timers_alloc(p);
+	it->it_dying = true;
 
-	pt = pool_get(&ptimer_pool, PR_WAITOK | PR_ZERO);
-	if (evp != NULL) {
-		if (((error =
-		    (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
-		    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
-			(pt->pt_ev.sigev_notify > SIGEV_SA)) ||
-			(pt->pt_ev.sigev_notify == SIGEV_SIGNAL &&
-			 (pt->pt_ev.sigev_signo <= 0 ||
-			  pt->pt_ev.sigev_signo >= NSIG))) {
-			pool_put(&ptimer_pool, pt);
-			return (error ? error : EINVAL);
+	/*
+	 * For non-virtual timers, stop the callout, or wait for it to
+	 * run if it has already fired.  It cannot restart again after
+	 * this point: the callout won't restart itself when dying, no
+	 * other users holding the lock can restart it, and any other
+	 * users waiting for callout_halt concurrently (itimer_settime)
+	 * will restart from the top.
+	 */
+	if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
+		callout_halt(&it->it_ch, &itimer_mutex);
+		if (it->it_clockid == CLOCK_REALTIME &&
+		    it->it_ops->ito_realtime_changed != NULL) {
+			LIST_REMOVE(it, it_rtchgq);
 		}
 	}
 
-	/* Find a free timer slot, skipping those reserved for setitimer(). */
-	mutex_spin_enter(&timer_lock);
-	for (timerid = TIMER_MIN; timerid < TIMER_MAX; timerid++)
-		if (pts->pts_timers[timerid] == NULL)
-			break;
-	if (timerid == TIMER_MAX) {
-		mutex_spin_exit(&timer_lock);
-		pool_put(&ptimer_pool, pt);
-		return EAGAIN;
+	/* Remove it from the queue to be signalled.  */
+	if (it->it_queued) {
+		TAILQ_REMOVE(it->it_ops->ito_queue, it, it_chain);
+		it->it_queued = false;
 	}
-	if (evp == NULL) {
-		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
-		switch (id) {
-		case CLOCK_REALTIME:
-		case CLOCK_MONOTONIC:
-			pt->pt_ev.sigev_signo = SIGALRM;
-			break;
-		case CLOCK_VIRTUAL:
-			pt->pt_ev.sigev_signo = SIGVTALRM;
-			break;
-		case CLOCK_PROF:
-			pt->pt_ev.sigev_signo = SIGPROF;
-			break;
+
+	/* All done with the global state.  */
+	itimer_unlock();
+
+	/* Destroy the callout, if needed. */
+	if (!CLOCK_VIRTUAL_P(it->it_clockid))
+		callout_destroy(&it->it_ch);
+}
+
+/*
+ * itimer_decr:
+ *
+ *	Decrement an interval timer by a specified number of nanoseconds,
+ *	which must be less than a second, i.e. < 1000000000.  If the timer
+ *	expires, then reload it.  In this case, carry over (nsec - old value)
+ *	to reduce the value reloaded into the timer so that the timer does
+ *	not drift.  This routine assumes that it is called in a context where
+ *	the timers on which it is operating cannot change in value.
+ *
+ *	Returns true if the timer has expired.
+ */
+static bool
+itimer_decr(struct itimer *it, int nsec)
+{
+	struct itimerspec *itp;
+	int error __diagused;
+
+	KASSERT(itimer_lock_held());
+	KASSERT(CLOCK_VIRTUAL_P(it->it_clockid));
+
+	itp = &it->it_time;
+	if (itp->it_value.tv_nsec < nsec) {
+		if (itp->it_value.tv_sec == 0) {
+			/* expired, and already in next interval */
+			nsec -= itp->it_value.tv_nsec;
+			goto expire;
 		}
-		pt->pt_ev.sigev_value.sival_int = timerid;
+		itp->it_value.tv_nsec += 1000000000;
+		itp->it_value.tv_sec--;
 	}
-	pt->pt_info.ksi_signo = pt->pt_ev.sigev_signo;
-	pt->pt_info.ksi_errno = 0;
-	pt->pt_info.ksi_code = 0;
-	pt->pt_info.ksi_pid = p->p_pid;
-	pt->pt_info.ksi_uid = kauth_cred_getuid(l->l_cred);
-	pt->pt_info.ksi_value = pt->pt_ev.sigev_value;
-	pt->pt_type = id;
-	pt->pt_proc = p;
-	pt->pt_overruns = 0;
-	pt->pt_poverruns = 0;
-	pt->pt_entry = timerid;
-	pt->pt_queued = false;
-	timespecclear(&pt->pt_time.it_value);
-	if (!CLOCK_VIRTUAL_P(id))
-		callout_init(&pt->pt_ch, CALLOUT_MPSAFE);
-	else
-		pt->pt_active = 0;
+	itp->it_value.tv_nsec -= nsec;
+	nsec = 0;
+	if (timespecisset(&itp->it_value))
+		return false;
+	/* expired, exactly at end of interval */
+ expire:
+	if (timespecisset(&itp->it_interval)) {
+		itp->it_value = itp->it_interval;
+		itp->it_value.tv_nsec -= nsec;
+		if (itp->it_value.tv_nsec < 0) {
+			itp->it_value.tv_nsec += 1000000000;
+			itp->it_value.tv_sec--;
+		}
+		error = itimer_settime(it);
+		KASSERT(error == 0); /* virtual, never fails */
+	} else
+		itp->it_value.tv_nsec = 0;		/* sec is already 0 */
+	return true;
+}
+
+/*
+ * itimer_fire:
+ *
+ *	An interval timer has fired.  Enqueue it for processing, if
+ *	needed.
+ */
+void
+itimer_fire(struct itimer * const it)
+{
 
-	pts->pts_timers[timerid] = pt;
-	mutex_spin_exit(&timer_lock);
+	KASSERT(itimer_lock_held());
 
-	return copyout(&timerid, tid, sizeof(timerid));
+	if (!it->it_queued) {
+		TAILQ_INSERT_TAIL(it->it_ops->ito_queue, it, it_chain);
+		it->it_queued = true;
+		softint_schedule(*it->it_ops->ito_sihp);
+	}
 }
 
-/* Delete a POSIX realtime timer */
-int
-sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap,
-    register_t *retval)
+static void itimer_callout(void *);
+
+/*
+ * itimer_arm_real:
+ *
+ *	Arm a non-virtual timer.
+ */
+static void
+itimer_arm_real(struct itimer * const it)
 {
-	/* {
-		syscallarg(timer_t) timerid;
-	} */
-	struct proc *p = l->l_proc;
-	timer_t timerid;
-	struct ptimers *pts;
-	struct ptimer *pt, *ptn;
+	/*
+	 * Don't need to check tshzto() return value, here.
+	 * callout_reset() does it for us.
+	 */
+	callout_reset(&it->it_ch,
+	    (it->it_clockid == CLOCK_MONOTONIC
+		? tshztoup(&it->it_time.it_value)
+		: tshzto(&it->it_time.it_value)),
+	    itimer_callout, it);
+}
 
-	timerid = SCARG(uap, timerid);
-	pts = p->p_timers;
-	
-	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
-		return (EINVAL);
+/*
+ * itimer_callout:
+ *
+ *	Callout to expire a non-virtual timer.  Queue it up for processing,
+ *	and then reload, if it is configured to do so.
+ *
+ *	N.B. A delay in processing this callout causes multiple
+ *	SIGALRM calls to be compressed into one.
+ */
+static void
+itimer_callout(void *arg)
+{
+	uint64_t last_val, next_val, interval, now_ns;
+	struct timespec now, next;
+	struct itimer * const it = arg;
+	int backwards;
 
-	mutex_spin_enter(&timer_lock);
-	if ((pt = pts->pts_timers[timerid]) == NULL) {
-		mutex_spin_exit(&timer_lock);
-		return (EINVAL);
+	itimer_lock();
+	(*it->it_ops->ito_fire)(it);
+
+	if (!timespecisset(&it->it_time.it_interval)) {
+		timespecclear(&it->it_time.it_value);
+		itimer_unlock();
+		return;
 	}
-	if (CLOCK_VIRTUAL_P(pt->pt_type)) {
-		if (pt->pt_active) {
-			ptn = LIST_NEXT(pt, pt_list);
-			LIST_REMOVE(pt, pt_list);
-			for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
-				timespecadd(&pt->pt_time.it_value,
-				    &ptn->pt_time.it_value,
-				    &ptn->pt_time.it_value);
-			pt->pt_active = 0;
-		}
+
+	if (it->it_clockid == CLOCK_MONOTONIC) {
+		getnanouptime(&now);
+	} else {
+		getnanotime(&now);
 	}
+	backwards = (timespeccmp(&it->it_time.it_value, &now, >));
+	timespecadd(&it->it_time.it_value, &it->it_time.it_interval, &next);
+	/* Handle the easy case of non-overflown timers first. */
+	if (!backwards && timespeccmp(&next, &now, >)) {
+		it->it_time.it_value = next;
+	} else {
+		now_ns = timespec2ns(&now);
+		last_val = timespec2ns(&it->it_time.it_value);
+		interval = timespec2ns(&it->it_time.it_interval);
 
-	/* Free the timer and release the lock.  */
-	itimerfree(pts, timerid);
+		next_val = now_ns +
+		    (now_ns - last_val + interval - 1) % interval;
 
-	return (0);
+		if (backwards)
+			next_val += interval;
+		else
+			it->it_overruns += (now_ns - last_val) / interval;
+
+		it->it_time.it_value.tv_sec = next_val / 1000000000;
+		it->it_time.it_value.tv_nsec = next_val % 1000000000;
+	}
+
+	/*
+	 * Reset the callout, if it's not going away.
+	 */
+	if (!it->it_dying)
+		itimer_arm_real(it);
+	itimer_unlock();
 }
 
 /*
- * Set up the given timer. The value in pt->pt_time.it_value is taken
- * to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC timers and
- * a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers.
+ * itimer_settime:
+ *
+ *	Set up the given interval timer. The value in it->it_time.it_value
+ *	is taken to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC
+ *	timers and a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers.
  *
- * If the callout had already fired but not yet run, fails with
- * ERESTART -- caller must restart from the top to look up a timer.
+ *	If the callout had already fired but not yet run, fails with
+ *	ERESTART -- caller must restart from the top to look up a timer.
  */
 int
-timer_settime(struct ptimer *pt)
+itimer_settime(struct itimer *it)
 {
-	struct ptimer *ptn, *pptn;
-	struct ptlist *ptl;
+	struct itimer *itn, *pitn;
+	struct itlist *itl;
 
-	KASSERT(mutex_owned(&timer_lock));
+	KASSERT(itimer_lock_held());
 
-	if (!CLOCK_VIRTUAL_P(pt->pt_type)) {
+	if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
 		/*
 		 * Try to stop the callout.  However, if it had already
 		 * fired, we have to drop the lock to wait for it, so
@@ -743,73 +903,66 @@ timer_settime(struct ptimer *pt)
 		 * any more.  In that case, tell the caller to start
 		 * over from the top.
 		 */
-		if (callout_halt(&pt->pt_ch, &timer_lock))
+		if (callout_halt(&it->it_ch, &itimer_mutex))
 			return ERESTART;
 
-		/* Now we can touch pt and start it up again.  */
-		if (timespecisset(&pt->pt_time.it_value)) {
-			/*
-			 * Don't need to check tshzto() return value, here.
-			 * callout_reset() does it for us.
-			 */
-			callout_reset(&pt->pt_ch,
-			    pt->pt_type == CLOCK_MONOTONIC ?
-			    tshztoup(&pt->pt_time.it_value) :
-			    tshzto(&pt->pt_time.it_value),
-			    realtimerexpire, pt);
-		}
+		/* Now we can touch it and start it up again. */
+		if (timespecisset(&it->it_time.it_value))
+			itimer_arm_real(it);
 	} else {
-		if (pt->pt_active) {
-			ptn = LIST_NEXT(pt, pt_list);
-			LIST_REMOVE(pt, pt_list);
-			for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
-				timespecadd(&pt->pt_time.it_value,
-				    &ptn->pt_time.it_value,
-				    &ptn->pt_time.it_value);
-		}
-		if (timespecisset(&pt->pt_time.it_value)) {
-			if (pt->pt_type == CLOCK_VIRTUAL)
-				ptl = &pt->pt_proc->p_timers->pts_virtual;
-			else
-				ptl = &pt->pt_proc->p_timers->pts_prof;
+		if (it->it_active) {
+			itn = LIST_NEXT(it, it_list);
+			LIST_REMOVE(it, it_list);
+			for ( ; itn; itn = LIST_NEXT(itn, it_list))
+				timespecadd(&it->it_time.it_value,
+				    &itn->it_time.it_value,
+				    &itn->it_time.it_value);
+		}
+		if (timespecisset(&it->it_time.it_value)) {
+			itl = it->it_vlist;
+			for (itn = LIST_FIRST(itl), pitn = NULL;
+			     itn && timespeccmp(&it->it_time.it_value,
+				 &itn->it_time.it_value, >);
+			     pitn = itn, itn = LIST_NEXT(itn, it_list))
+				timespecsub(&it->it_time.it_value,
+				    &itn->it_time.it_value,
+				    &it->it_time.it_value);
 
-			for (ptn = LIST_FIRST(ptl), pptn = NULL;
-			     ptn && timespeccmp(&pt->pt_time.it_value,
-				 &ptn->pt_time.it_value, >);
-			     pptn = ptn, ptn = LIST_NEXT(ptn, pt_list))
-				timespecsub(&pt->pt_time.it_value,
-				    &ptn->pt_time.it_value,
-				    &pt->pt_time.it_value);
-
-			if (pptn)
-				LIST_INSERT_AFTER(pptn, pt, pt_list);
+			if (pitn)
+				LIST_INSERT_AFTER(pitn, it, it_list);
 			else
-				LIST_INSERT_HEAD(ptl, pt, pt_list);
+				LIST_INSERT_HEAD(itl, it, it_list);
+
+			for ( ; itn ; itn = LIST_NEXT(itn, it_list))
+				timespecsub(&itn->it_time.it_value,
+				    &it->it_time.it_value,
+				    &itn->it_time.it_value);
 
-			for ( ; ptn ; ptn = LIST_NEXT(ptn, pt_list))
-				timespecsub(&ptn->pt_time.it_value,
-				    &pt->pt_time.it_value,
-				    &ptn->pt_time.it_value);
-
-			pt->pt_active = 1;
-		} else
-			pt->pt_active = 0;
+			it->it_active = true;
+		} else {
+			it->it_active = false;
+		}
 	}
 
 	/* Success!  */
 	return 0;
 }
 
+/*
+ * itimer_gettime:
+ *
+ *	Return the remaining time of an interval timer.
+ */
 void
-timer_gettime(struct ptimer *pt, struct itimerspec *aits)
+itimer_gettime(const struct itimer *it, struct itimerspec *aits)
 {
 	struct timespec now;
-	struct ptimer *ptn;
+	struct itimer *itn;
 
-	KASSERT(mutex_owned(&timer_lock));
+	KASSERT(itimer_lock_held());
 
-	*aits = pt->pt_time;
-	if (!CLOCK_VIRTUAL_P(pt->pt_type)) {
+	*aits = it->it_time;
+	if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
 		/*
 		 * Convert from absolute to relative time in .it_value
 		 * part of real time timer.  If time for real time
@@ -818,7 +971,7 @@ timer_gettime(struct ptimer *pt, struct 
 		 * off.
 		 */
 		if (timespecisset(&aits->it_value)) {
-			if (pt->pt_type == CLOCK_REALTIME) {
+			if (it->it_clockid == CLOCK_REALTIME) {
 				getnanotime(&now);
 			} else { /* CLOCK_MONOTONIC */
 				getnanouptime(&now);
@@ -829,28 +982,345 @@ timer_gettime(struct ptimer *pt, struct 
 				timespecsub(&aits->it_value, &now,
 				    &aits->it_value);
 		}
-	} else if (pt->pt_active) {
-		if (pt->pt_type == CLOCK_VIRTUAL)
-			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_virtual);
-		else
-			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_prof);
-		for ( ; ptn && ptn != pt; ptn = LIST_NEXT(ptn, pt_list))
+	} else if (it->it_active) {
+		for (itn = LIST_FIRST(it->it_vlist); itn && itn != it;
+		     itn = LIST_NEXT(itn, it_list))
 			timespecadd(&aits->it_value,
-			    &ptn->pt_time.it_value, &aits->it_value);
-		KASSERT(ptn != NULL); /* pt should be findable on the list */
+			    &itn->it_time.it_value, &aits->it_value);
+		KASSERT(itn != NULL); /* it should be findable on the list */
 	} else
 		timespecclear(&aits->it_value);
 }
 
-
-
-/* Set and arm a POSIX realtime timer */
-int
-sys___timer_settime50(struct lwp *l,
-    const struct sys___timer_settime50_args *uap,
-    register_t *retval)
-{
-	/* {
+/*
+ * Per-process timer support.
+ *
+ * Both the BSD getitimer() family and the POSIX timer_*() family of
+ * routines are supported.
+ *
+ * All timers are kept in an array pointed to by p_timers, which is
+ * allocated on demand - many processes don't use timers at all. The
+ * first four elements in this array are reserved for the BSD timers:
+ * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, element
+ * 2 is ITIMER_PROF, and element 3 is ITIMER_MONOTONIC. The rest may be
+ * allocated by the timer_create() syscall.
+ *
+ * These timers are a "sub-class" of interval timer.
+ */
+
+/*
+ * ptimer_free:
+ *
+ *	Free the per-process timer at the specified index.
+ */
+static void
+ptimer_free(struct ptimers *pts, int index)
+{
+	struct itimer *it;
+	struct ptimer *pt;
+
+	KASSERT(itimer_lock_held());
+
+	it = pts->pts_timers[index];
+	pt = container_of(it, struct ptimer, pt_itimer);
+	pts->pts_timers[index] = NULL;
+	itimer_fini(it);	/* releases itimer_lock */
+	kmem_free(pt, sizeof(*pt));
+}
+
+/*
+ * ptimers_alloc:
+ *
+ *	Allocate a ptimers for the specified process.
+ */
+static struct ptimers *
+ptimers_alloc(struct proc *p)
+{
+	struct ptimers *pts;
+	int i;
+
+	pts = kmem_alloc(sizeof(*pts), KM_SLEEP);
+	LIST_INIT(&pts->pts_virtual);
+	LIST_INIT(&pts->pts_prof);
+	for (i = 0; i < TIMER_MAX; i++)
+		pts->pts_timers[i] = NULL;
+	itimer_lock();
+	if (p->p_timers == NULL) {
+		p->p_timers = pts;
+		itimer_unlock();
+		return pts;
+	}
+	itimer_unlock();
+	kmem_free(pts, sizeof(*pts));
+	return p->p_timers;
+}
+
+/*
+ * ptimers_free:
+ *
+ *	Clean up the per-process timers. If "which" is set to TIMERS_ALL,
+ *	then clean up all timers and free all the data structures. If
+ *	"which" is set to TIMERS_POSIX, only clean up the timers allocated
+ *	by timer_create(), not the BSD setitimer() timers, and only free the
+ *	structure if none of those remain.
+ *
+ *	This function is exported because it is needed in the exec and
+ *	exit code paths.
+ */
+void
+ptimers_free(struct proc *p, int which)
+{
+	struct ptimers *pts;
+	struct itimer *itn;
+	struct timespec ts;
+	int i;
+
+	if (p->p_timers == NULL)
+		return;
+
+	pts = p->p_timers;
+	itimer_lock();
+	if (which == TIMERS_ALL) {
+		p->p_timers = NULL;
+		i = 0;
+	} else {
+		timespecclear(&ts);
+		for (itn = LIST_FIRST(&pts->pts_virtual);
+		     itn && itn != pts->pts_timers[ITIMER_VIRTUAL];
+		     itn = LIST_NEXT(itn, it_list)) {
+			KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
+			timespecadd(&ts, &itn->it_time.it_value, &ts);
+		}
+		LIST_FIRST(&pts->pts_virtual) = NULL;
+		if (itn) {
+			KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
+			timespecadd(&ts, &itn->it_time.it_value,
+			    &itn->it_time.it_value);
+			LIST_INSERT_HEAD(&pts->pts_virtual, itn, it_list);
+		}
+		timespecclear(&ts);
+		for (itn = LIST_FIRST(&pts->pts_prof);
+		     itn && itn != pts->pts_timers[ITIMER_PROF];
+		     itn = LIST_NEXT(itn, it_list)) {
+			KASSERT(itn->it_clockid == CLOCK_PROF);
+			timespecadd(&ts, &itn->it_time.it_value, &ts);
+		}
+		LIST_FIRST(&pts->pts_prof) = NULL;
+		if (itn) {
+			KASSERT(itn->it_clockid == CLOCK_PROF);
+			timespecadd(&ts, &itn->it_time.it_value,
+			    &itn->it_time.it_value);
+			LIST_INSERT_HEAD(&pts->pts_prof, itn, it_list);
+		}
+		i = TIMER_MIN;
+	}
+	for ( ; i < TIMER_MAX; i++) {
+		if (pts->pts_timers[i] != NULL) {
+			/* Free the timer and release the lock.  */
+			ptimer_free(pts, i);
+			/* Reacquire the lock for the next one.  */
+			itimer_lock();
+		}
+	}
+	if (pts->pts_timers[0] == NULL && pts->pts_timers[1] == NULL &&
+	    pts->pts_timers[2] == NULL && pts->pts_timers[3] == NULL) {
+		p->p_timers = NULL;
+		itimer_unlock();
+		kmem_free(pts, sizeof(*pts));
+	} else
+		itimer_unlock();
+}
+
+/*
+ * ptimer_fire:
+ *
+ *	Fire a per-process timer.
+ */
+static void
+ptimer_fire(struct itimer *it)
+{
+	struct ptimer *pt = container_of(it, struct ptimer, pt_itimer);
+
+	KASSERT(itimer_lock_held());
+
+	/*
+	 * XXX Can overrun, but we don't do signal queueing yet, anyway.
+	 * XXX Relying on the clock interrupt is stupid.
+	 */
+	if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
+		return;
+	}
+	itimer_fire(it);
+}
+
+/*
+ * Operations vector for per-process timers (BSD and POSIX).
+ */
+static const struct itimer_ops ptimer_itimer_ops = {
+	.ito_queue = &ptimer_queue,
+	.ito_sihp = &ptimer_sih,
+	.ito_fire = &ptimer_fire,
+};
+
+/*
+ * sys_timer_create:
+ *
+ *	System call to create a POSIX timer.
+ */
+int
+sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap,
+    register_t *retval)
+{
+	/* {
+		syscallarg(clockid_t) clock_id;
+		syscallarg(struct sigevent *) evp;
+		syscallarg(timer_t *) timerid;
+	} */
+
+	return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
+	    SCARG(uap, evp), copyin, l);
+}
+
+int
+timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
+    copyin_t fetch_event, struct lwp *l)
+{
+	int error;
+	timer_t timerid;
+	struct itlist *itl;
+	struct ptimers *pts;
+	struct ptimer *pt;
+	struct proc *p;
+
+	p = l->l_proc;
+
+	if ((u_int)id > CLOCK_MONOTONIC)
+		return (EINVAL);
+
+	if ((pts = p->p_timers) == NULL)
+		pts = ptimers_alloc(p);
+
+	pt = kmem_zalloc(sizeof(*pt), KM_SLEEP);
+	if (evp != NULL) {
+		if (((error =
+		    (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
+		    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
+			(pt->pt_ev.sigev_notify > SIGEV_SA)) ||
+			(pt->pt_ev.sigev_notify == SIGEV_SIGNAL &&
+			 (pt->pt_ev.sigev_signo <= 0 ||
+			  pt->pt_ev.sigev_signo >= NSIG))) {
+			kmem_free(pt, sizeof(*pt));
+			return (error ? error : EINVAL);
+		}
+	}
+
+	/* Find a free timer slot, skipping those reserved for setitimer(). */
+	itimer_lock();
+	for (timerid = TIMER_MIN; timerid < TIMER_MAX; timerid++)
+		if (pts->pts_timers[timerid] == NULL)
+			break;
+	if (timerid == TIMER_MAX) {
+		itimer_unlock();
+		kmem_free(pt, sizeof(*pt));
+		return EAGAIN;
+	}
+	if (evp == NULL) {
+		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
+		switch (id) {
+		case CLOCK_REALTIME:
+		case CLOCK_MONOTONIC:
+			pt->pt_ev.sigev_signo = SIGALRM;
+			break;
+		case CLOCK_VIRTUAL:
+			pt->pt_ev.sigev_signo = SIGVTALRM;
+			break;
+		case CLOCK_PROF:
+			pt->pt_ev.sigev_signo = SIGPROF;
+			break;
+		}
+		pt->pt_ev.sigev_value.sival_int = timerid;
+	}
+
+	switch (id) {
+	case CLOCK_VIRTUAL:
+		itl = &pt->pt_proc->p_timers->pts_virtual;
+		break;
+	case CLOCK_PROF:
+		itl = &pt->pt_proc->p_timers->pts_prof;
+		break;
+	default:
+		itl = NULL;
+	}
+
+	itimer_init(&pt->pt_itimer, &ptimer_itimer_ops, id, itl);
+	pt->pt_proc = p;
+	pt->pt_poverruns = 0;
+	pt->pt_entry = timerid;
+
+	pts->pts_timers[timerid] = &pt->pt_itimer;
+	itimer_unlock();
+
+	return copyout(&timerid, tid, sizeof(timerid));
+}
+
+/*
+ * sys_timer_delete:
+ *
+ *	System call to delete a POSIX timer.
+ */
+int
+sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap,
+    register_t *retval)
+{
+	/* {
+		syscallarg(timer_t) timerid;
+	} */
+	struct proc *p = l->l_proc;
+	timer_t timerid;
+	struct ptimers *pts;
+	struct itimer *it, *itn;
+
+	timerid = SCARG(uap, timerid);
+	pts = p->p_timers;
+	
+	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
+		return (EINVAL);
+
+	itimer_lock();
+	if ((it = pts->pts_timers[timerid]) == NULL) {
+		itimer_unlock();
+		return (EINVAL);
+	}
+
+	if (CLOCK_VIRTUAL_P(it->it_clockid)) {
+		if (it->it_active) {
+			itn = LIST_NEXT(it, it_list);
+			LIST_REMOVE(it, it_list);
+			for ( ; itn; itn = LIST_NEXT(itn, it_list))
+				timespecadd(&it->it_time.it_value,
+				    &itn->it_time.it_value,
+				    &itn->it_time.it_value);
+			it->it_active = false;
+		}
+	}
+
+	/* Free the timer and release the lock.  */
+	ptimer_free(pts, timerid);
+
+	return (0);
+}
+
+/*
+ * sys___timer_settime50:
+ *
+ *	System call to set/arm a POSIX timer.
+ */
+int
+sys___timer_settime50(struct lwp *l,
+    const struct sys___timer_settime50_args *uap,
+    register_t *retval)
+{
+	/* {
 		syscallarg(timer_t) timerid;
 		syscallarg(int) flags;
 		syscallarg(const struct itimerspec *) value;
@@ -883,7 +1353,7 @@ dotimer_settime(int timerid, struct itim
 	struct timespec now;
 	struct itimerspec val, oval;
 	struct ptimers *pts;
-	struct ptimer *pt;
+	struct itimer *it;
 	int error;
 
 	pts = p->p_timers;
@@ -895,15 +1365,15 @@ dotimer_settime(int timerid, struct itim
 	    (error = itimespecfix(&val.it_interval)) != 0)
 		return error;
 
-	mutex_spin_enter(&timer_lock);
-restart:
-	if ((pt = pts->pts_timers[timerid]) == NULL) {
-		mutex_spin_exit(&timer_lock);
+	itimer_lock();
+ restart:
+	if ((it = pts->pts_timers[timerid]) == NULL) {
+		itimer_unlock();
 		return EINVAL;
 	}
 
-	oval = pt->pt_time;
-	pt->pt_time = val;
+	oval = it->it_time;
+	it->it_time = val;
 
 	/*
 	 * If we've been passed a relative time for a realtime timer,
@@ -912,38 +1382,38 @@ restart:
 	 * to zero, which would cancel the timer, or let it go
 	 * negative, which would confuse the comparison tests.
 	 */
-	if (timespecisset(&pt->pt_time.it_value)) {
-		if (!CLOCK_VIRTUAL_P(pt->pt_type)) {
+	if (timespecisset(&it->it_time.it_value)) {
+		if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
 			if ((flags & TIMER_ABSTIME) == 0) {
-				if (pt->pt_type == CLOCK_REALTIME) {
+				if (it->it_clockid == CLOCK_REALTIME) {
 					getnanotime(&now);
 				} else { /* CLOCK_MONOTONIC */
 					getnanouptime(&now);
 				}
-				timespecadd(&pt->pt_time.it_value, &now,
-				    &pt->pt_time.it_value);
+				timespecadd(&it->it_time.it_value, &now,
+				    &it->it_time.it_value);
 			}
 		} else {
 			if ((flags & TIMER_ABSTIME) != 0) {
 				getnanotime(&now);
-				timespecsub(&pt->pt_time.it_value, &now,
-				    &pt->pt_time.it_value);
-				if (!timespecisset(&pt->pt_time.it_value) ||
-				    pt->pt_time.it_value.tv_sec < 0) {
-					pt->pt_time.it_value.tv_sec = 0;
-					pt->pt_time.it_value.tv_nsec = 1;
+				timespecsub(&it->it_time.it_value, &now,
+				    &it->it_time.it_value);
+				if (!timespecisset(&it->it_time.it_value) ||
+				    it->it_time.it_value.tv_sec < 0) {
+					it->it_time.it_value.tv_sec = 0;
+					it->it_time.it_value.tv_nsec = 1;
 				}
 			}
 		}
 	}
 
-	error = timer_settime(pt);
+	error = itimer_settime(it);
 	if (error == ERESTART) {
-		KASSERT(!CLOCK_VIRTUAL_P(pt->pt_type));
+		KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
 		goto restart;
 	}
 	KASSERT(error == 0);
-	mutex_spin_exit(&timer_lock);
+	itimer_unlock();
 
 	if (ovalue)
 		*ovalue = oval;
@@ -951,7 +1421,11 @@ restart:
 	return (0);
 }
 
-/* Return the time remaining until a POSIX timer fires. */
+/*
+ * sys___timer_gettime50:
+ *
+ *	System call to return the time remaining until a POSIX timer fires.
+ */
 int
 sys___timer_gettime50(struct lwp *l,
     const struct sys___timer_gettime50_args *uap, register_t *retval)
@@ -973,27 +1447,29 @@ sys___timer_gettime50(struct lwp *l,
 int
 dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its)
 {
-	struct ptimer *pt;
+	struct itimer *it;
 	struct ptimers *pts;
 
 	pts = p->p_timers;
 	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
 		return (EINVAL);
-	mutex_spin_enter(&timer_lock);
-	if ((pt = pts->pts_timers[timerid]) == NULL) {
-		mutex_spin_exit(&timer_lock);
+	itimer_lock();
+	if ((it = pts->pts_timers[timerid]) == NULL) {
+		itimer_unlock();
 		return (EINVAL);
 	}
-	timer_gettime(pt, its);
-	mutex_spin_exit(&timer_lock);
+	itimer_gettime(it, its);
+	itimer_unlock();
 
 	return 0;
 }
 
 /*
- * Return the count of the number of times a periodic timer expired
- * while a notification was already pending. The counter is reset when
- * a timer expires and a notification can be posted.
+ * sys_timer_getoverrun:
+ *
+ *	System call to return the number of times a POSIX timer has
+ *	expired while a notification was already pending.  The counter
+ *	is reset when a timer expires and a notification can be posted.
  */
 int
 sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap,
@@ -1005,6 +1481,7 @@ sys_timer_getoverrun(struct lwp *l, cons
 	struct proc *p = l->l_proc;
 	struct ptimers *pts;
 	int timerid;
+	struct itimer *it;
 	struct ptimer *pt;
 
 	timerid = SCARG(uap, timerid);
@@ -1012,90 +1489,25 @@ sys_timer_getoverrun(struct lwp *l, cons
 	pts = p->p_timers;
 	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
 		return (EINVAL);
-	mutex_spin_enter(&timer_lock);
-	if ((pt = pts->pts_timers[timerid]) == NULL) {
-		mutex_spin_exit(&timer_lock);
+	itimer_lock();
+	if ((it = pts->pts_timers[timerid]) == NULL) {
+		itimer_unlock();
 		return (EINVAL);
 	}
+	pt = container_of(it, struct ptimer, pt_itimer);
 	*retval = pt->pt_poverruns;
 	if (*retval >= DELAYTIMER_MAX)
 		*retval = DELAYTIMER_MAX;
-	mutex_spin_exit(&timer_lock);
+	itimer_unlock();
 
 	return (0);
 }
 
 /*
- * Real interval timer expired:
- * send process whose timer expired an alarm signal.
- * If time is not set up to reload, then just return.
- * Else compute next time timer should go off which is > current time.
- * This is where delay in processing this timeout causes multiple
- * SIGALRM calls to be compressed into one.
+ * sys___getitimer50:
+ *
+ *	System call to get the time remaining before a BSD timer fires.
  */
-void
-realtimerexpire(void *arg)
-{
-	uint64_t last_val, next_val, interval, now_ns;
-	struct timespec now, next;
-	struct ptimer *pt;
-	int backwards;
-
-	pt = arg;
-
-	mutex_spin_enter(&timer_lock);
-	itimerfire(pt);
-
-	if (!timespecisset(&pt->pt_time.it_interval)) {
-		timespecclear(&pt->pt_time.it_value);
-		mutex_spin_exit(&timer_lock);
-		return;
-	}
-
-	if (pt->pt_type == CLOCK_MONOTONIC) {
-		getnanouptime(&now);
-	} else {
-		getnanotime(&now);
-	}
-	backwards = (timespeccmp(&pt->pt_time.it_value, &now, >));
-	timespecadd(&pt->pt_time.it_value, &pt->pt_time.it_interval, &next);
-	/* Handle the easy case of non-overflown timers first. */
-	if (!backwards && timespeccmp(&next, &now, >)) {
-		pt->pt_time.it_value = next;
-	} else {
-		now_ns = timespec2ns(&now);
-		last_val = timespec2ns(&pt->pt_time.it_value);
-		interval = timespec2ns(&pt->pt_time.it_interval);
-
-		next_val = now_ns +
-		    (now_ns - last_val + interval - 1) % interval;
-
-		if (backwards)
-			next_val += interval;
-		else
-			pt->pt_overruns += (now_ns - last_val) / interval;
-
-		pt->pt_time.it_value.tv_sec = next_val / 1000000000;
-		pt->pt_time.it_value.tv_nsec = next_val % 1000000000;
-	}
-
-	/*
-	 * Reset the callout, if it's not going away.
-	 *
-	 * Don't need to check tshzto() return value, here.
-	 * callout_reset() does it for us.
-	 */
-	if (!pt->pt_dying)
-		callout_reset(&pt->pt_ch,
-		    (pt->pt_type == CLOCK_MONOTONIC
-			? tshztoup(&pt->pt_time.it_value)
-			: tshzto(&pt->pt_time.it_value)),
-		    realtimerexpire, pt);
-	mutex_spin_exit(&timer_lock);
-}
-
-/* BSD routine to get the value of an interval timer. */
-/* ARGSUSED */
 int
 sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap,
     register_t *retval)
@@ -1119,29 +1531,32 @@ int
 dogetitimer(struct proc *p, int which, struct itimerval *itvp)
 {
 	struct ptimers *pts;
-	struct ptimer *pt;
+	struct itimer *it;
 	struct itimerspec its;
 
 	if ((u_int)which > ITIMER_MONOTONIC)
 		return (EINVAL);
 
-	mutex_spin_enter(&timer_lock);
+	itimer_lock();
 	pts = p->p_timers;
-	if (pts == NULL || (pt = pts->pts_timers[which]) == NULL) {
+	if (pts == NULL || (it = pts->pts_timers[which]) == NULL) {
 		timerclear(&itvp->it_value);
 		timerclear(&itvp->it_interval);
 	} else {
-		timer_gettime(pt, &its);
+		itimer_gettime(it, &its);
 		TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value);
 		TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval);
 	}
-	mutex_spin_exit(&timer_lock);
+	itimer_unlock();
 
 	return 0;
 }
 
-/* BSD routine to set/arm an interval timer. */
-/* ARGSUSED */
+/*
+ * sys___setitimer50:
+ *
+ *	System call to set/arm a BSD timer.
+ */
 int
 sys___setitimer50(struct lwp *l, const struct sys___setitimer50_args *uap,
     register_t *retval)
@@ -1181,7 +1596,9 @@ dosetitimer(struct proc *p, int which, s
 {
 	struct timespec now;
 	struct ptimers *pts;
-	struct ptimer *pt, *spare;
+	struct ptimer *spare;
+	struct itimer *it;
+	struct itlist *itl;
 	int error;
 
 	KASSERT((u_int)which <= CLOCK_MONOTONIC);
@@ -1199,330 +1616,146 @@ dosetitimer(struct proc *p, int which, s
 	    pts->pts_timers[which] == NULL))
 		return (0);
 	if (pts == NULL)
-		pts = timers_alloc(p);
-	mutex_spin_enter(&timer_lock);
-restart:
-	pt = pts->pts_timers[which];
-	if (pt == NULL) {
+		pts = ptimers_alloc(p);
+	itimer_lock();
+ restart:
+	it = pts->pts_timers[which];
+	if (it == NULL) {
+		struct ptimer *pt;
+
 		if (spare == NULL) {
-			mutex_spin_exit(&timer_lock);
-			spare = pool_get(&ptimer_pool, PR_WAITOK | PR_ZERO);
+			itimer_unlock();
+			spare = kmem_zalloc(sizeof(*spare), KM_SLEEP);
 			goto retry;
 		}
 		pt = spare;
 		spare = NULL;
+
+		it = &pt->pt_itimer;
 		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
 		pt->pt_ev.sigev_value.sival_int = which;
-		pt->pt_overruns = 0;
-		pt->pt_proc = p;
-		pt->pt_type = which;
-		pt->pt_entry = which;
-		pt->pt_queued = false;
-		if (!CLOCK_VIRTUAL_P(which))
-			callout_init(&pt->pt_ch, CALLOUT_MPSAFE);
-		else
-			pt->pt_active = 0;
 
 		switch (which) {
 		case ITIMER_REAL:
 		case ITIMER_MONOTONIC:
+			itl = NULL;
 			pt->pt_ev.sigev_signo = SIGALRM;
 			break;
 		case ITIMER_VIRTUAL:
+			itl = &pt->pt_proc->p_timers->pts_virtual;
 			pt->pt_ev.sigev_signo = SIGVTALRM;
 			break;
 		case ITIMER_PROF:
+			itl = &pt->pt_proc->p_timers->pts_prof;
 			pt->pt_ev.sigev_signo = SIGPROF;
 			break;
 		}
-		pts->pts_timers[which] = pt;
+		itimer_init(it, &ptimer_itimer_ops, which, itl);
+		pt->pt_proc = p;
+		pt->pt_entry = which;
+
+		pts->pts_timers[which] = it;
 	}
 
-	TIMEVAL_TO_TIMESPEC(&itvp->it_value, &pt->pt_time.it_value);
-	TIMEVAL_TO_TIMESPEC(&itvp->it_interval, &pt->pt_time.it_interval);
+	TIMEVAL_TO_TIMESPEC(&itvp->it_value, &it->it_time.it_value);
+	TIMEVAL_TO_TIMESPEC(&itvp->it_interval, &it->it_time.it_interval);
 
-	if (timespecisset(&pt->pt_time.it_value)) {
+	if (timespecisset(&it->it_time.it_value)) {
 		/* Convert to absolute time */
 		/* XXX need to wrap in splclock for timecounters case? */
 		switch (which) {
 		case ITIMER_REAL:
 			getnanotime(&now);
-			timespecadd(&pt->pt_time.it_value, &now,
-			    &pt->pt_time.it_value);
+			timespecadd(&it->it_time.it_value, &now,
+			    &it->it_time.it_value);
 			break;
 		case ITIMER_MONOTONIC:
 			getnanouptime(&now);
-			timespecadd(&pt->pt_time.it_value, &now,
-			    &pt->pt_time.it_value);
+			timespecadd(&it->it_time.it_value, &now,
+			    &it->it_time.it_value);
 			break;
 		default:
 			break;
 		}
 	}
-	error = timer_settime(pt);
+	error = itimer_settime(it);
 	if (error == ERESTART) {
-		KASSERT(!CLOCK_VIRTUAL_P(pt->pt_type));
+		KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
 		goto restart;
 	}
 	KASSERT(error == 0);
-	mutex_spin_exit(&timer_lock);
+	itimer_unlock();
 	if (spare != NULL)
-		pool_put(&ptimer_pool, spare);
+		kmem_free(spare, sizeof(*spare));
 
 	return (0);
 }
 
-/* Utility routines to manage the array of pointers to timers. */
-struct ptimers *
-timers_alloc(struct proc *p)
-{
-	struct ptimers *pts;
-	int i;
-
-	pts = pool_get(&ptimers_pool, PR_WAITOK);
-	LIST_INIT(&pts->pts_virtual);
-	LIST_INIT(&pts->pts_prof);
-	for (i = 0; i < TIMER_MAX; i++)
-		pts->pts_timers[i] = NULL;
-	mutex_spin_enter(&timer_lock);
-	if (p->p_timers == NULL) {
-		p->p_timers = pts;
-		mutex_spin_exit(&timer_lock);
-		return pts;
-	}
-	mutex_spin_exit(&timer_lock);
-	pool_put(&ptimers_pool, pts);
-	return p->p_timers;
-}
-
 /*
- * Clean up the per-process timers. If "which" is set to TIMERS_ALL,
- * then clean up all timers and free all the data structures. If
- * "which" is set to TIMERS_POSIX, only clean up the timers allocated
- * by timer_create(), not the BSD setitimer() timers, and only free the
- * structure if none of those remain.
- */
-void
-timers_free(struct proc *p, int which)
-{
-	struct ptimers *pts;
-	struct ptimer *ptn;
-	struct timespec ts;
-	int i;
-
-	if (p->p_timers == NULL)
-		return;
-
-	pts = p->p_timers;
-	mutex_spin_enter(&timer_lock);
-	if (which == TIMERS_ALL) {
-		p->p_timers = NULL;
-		i = 0;
-	} else {
-		timespecclear(&ts);
-		for (ptn = LIST_FIRST(&pts->pts_virtual);
-		     ptn && ptn != pts->pts_timers[ITIMER_VIRTUAL];
-		     ptn = LIST_NEXT(ptn, pt_list)) {
-			KASSERT(ptn->pt_type == CLOCK_VIRTUAL);
-			timespecadd(&ts, &ptn->pt_time.it_value, &ts);
-		}
-		LIST_FIRST(&pts->pts_virtual) = NULL;
-		if (ptn) {
-			KASSERT(ptn->pt_type == CLOCK_VIRTUAL);
-			timespecadd(&ts, &ptn->pt_time.it_value,
-			    &ptn->pt_time.it_value);
-			LIST_INSERT_HEAD(&pts->pts_virtual, ptn, pt_list);
-		}
-		timespecclear(&ts);
-		for (ptn = LIST_FIRST(&pts->pts_prof);
-		     ptn && ptn != pts->pts_timers[ITIMER_PROF];
-		     ptn = LIST_NEXT(ptn, pt_list)) {
-			KASSERT(ptn->pt_type == CLOCK_PROF);
-			timespecadd(&ts, &ptn->pt_time.it_value, &ts);
-		}
-		LIST_FIRST(&pts->pts_prof) = NULL;
-		if (ptn) {
-			KASSERT(ptn->pt_type == CLOCK_PROF);
-			timespecadd(&ts, &ptn->pt_time.it_value,
-			    &ptn->pt_time.it_value);
-			LIST_INSERT_HEAD(&pts->pts_prof, ptn, pt_list);
-		}
-		i = TIMER_MIN;
-	}
-	for ( ; i < TIMER_MAX; i++) {
-		if (pts->pts_timers[i] != NULL) {
-			/* Free the timer and release the lock.  */
-			itimerfree(pts, i);
-			/* Reacquire the lock for the next one.  */
-			mutex_spin_enter(&timer_lock);
-		}
-	}
-	if (pts->pts_timers[0] == NULL && pts->pts_timers[1] == NULL &&
-	    pts->pts_timers[2] == NULL && pts->pts_timers[3] == NULL) {
-		p->p_timers = NULL;
-		mutex_spin_exit(&timer_lock);
-		pool_put(&ptimers_pool, pts);
-	} else
-		mutex_spin_exit(&timer_lock);
-}
-
-static void
-itimerfree(struct ptimers *pts, int index)
-{
-	struct ptimer *pt;
-
-	KASSERT(mutex_owned(&timer_lock));
-
-	pt = pts->pts_timers[index];
-
-	/*
-	 * Prevent new references, and notify the callout not to
-	 * restart itself.
-	 */
-	pts->pts_timers[index] = NULL;
-	pt->pt_dying = true;
-
-	/*
-	 * For non-virtual timers, stop the callout, or wait for it to
-	 * run if it has already fired.  It cannot restart again after
-	 * this point: the callout won't restart itself when dying, no
-	 * other users holding the lock can restart it, and any other
-	 * users waiting for callout_halt concurrently (timer_settime)
-	 * will restart from the top.
-	 */
-	if (!CLOCK_VIRTUAL_P(pt->pt_type))
-		callout_halt(&pt->pt_ch, &timer_lock);
-
-	/* Remove it from the queue to be signalled.  */
-	if (pt->pt_queued)
-		TAILQ_REMOVE(&timer_queue, pt, pt_chain);
-
-	/* All done with the global state.  */
-	mutex_spin_exit(&timer_lock);
-
-	/* Destroy the callout, if needed, and free the ptimer.  */
-	if (!CLOCK_VIRTUAL_P(pt->pt_type))
-		callout_destroy(&pt->pt_ch);
-	pool_put(&ptimer_pool, pt);
-}
-
-/*
- * Decrement an interval timer by a specified number
- * of nanoseconds, which must be less than a second,
- * i.e. < 1000000000.  If the timer expires, then reload
- * it.  In this case, carry over (nsec - old value) to
- * reduce the value reloaded into the timer so that
- * the timer does not drift.  This routine assumes
- * that it is called in a context where the timers
- * on which it is operating cannot change in value.
+ * ptimer_tick:
+ *
+ *	Called from hardclock() to decrement per-process virtual timers.
  */
-static int
-itimerdecr(struct ptimer *pt, int nsec)
-{
-	struct itimerspec *itp;
-	int error __diagused;
-
-	KASSERT(mutex_owned(&timer_lock));
-	KASSERT(CLOCK_VIRTUAL_P(pt->pt_type));
-
-	itp = &pt->pt_time;
-	if (itp->it_value.tv_nsec < nsec) {
-		if (itp->it_value.tv_sec == 0) {
-			/* expired, and already in next interval */
-			nsec -= itp->it_value.tv_nsec;
-			goto expire;
-		}
-		itp->it_value.tv_nsec += 1000000000;
-		itp->it_value.tv_sec--;
-	}
-	itp->it_value.tv_nsec -= nsec;
-	nsec = 0;
-	if (timespecisset(&itp->it_value))
-		return (1);
-	/* expired, exactly at end of interval */
-expire:
-	if (timespecisset(&itp->it_interval)) {
-		itp->it_value = itp->it_interval;
-		itp->it_value.tv_nsec -= nsec;
-		if (itp->it_value.tv_nsec < 0) {
-			itp->it_value.tv_nsec += 1000000000;
-			itp->it_value.tv_sec--;
-		}
-		error = timer_settime(pt);
-		KASSERT(error == 0); /* virtual, never fails */
-	} else
-		itp->it_value.tv_nsec = 0;		/* sec is already 0 */
-	return (0);
-}
-
-static void
-itimerfire(struct ptimer *pt)
-{
-
-	KASSERT(mutex_owned(&timer_lock));
-
-	/*
-	 * XXX Can overrun, but we don't do signal queueing yet, anyway.
-	 * XXX Relying on the clock interrupt is stupid.
-	 */
-	if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL || pt->pt_queued) {
-		return;
-	}
-	TAILQ_INSERT_TAIL(&timer_queue, pt, pt_chain);
-	pt->pt_queued = true;
-	softint_schedule(timer_sih);
-}
-
 void
-timer_tick(lwp_t *l, bool user)
+ptimer_tick(lwp_t *l, bool user)
 {
 	struct ptimers *pts;
-	struct ptimer *pt;
+	struct itimer *it;
 	proc_t *p;
 
 	p = l->l_proc;
 	if (p->p_timers == NULL)
 		return;
 
-	mutex_spin_enter(&timer_lock);
+	itimer_lock();
 	if ((pts = l->l_proc->p_timers) != NULL) {
 		/*
 		 * Run current process's virtual and profile time, as needed.
 		 */
-		if (user && (pt = LIST_FIRST(&pts->pts_virtual)) != NULL)
-			if (itimerdecr(pt, tick * 1000) == 0)
-				itimerfire(pt);
-		if ((pt = LIST_FIRST(&pts->pts_prof)) != NULL)
-			if (itimerdecr(pt, tick * 1000) == 0)
-				itimerfire(pt);
+		if (user && (it = LIST_FIRST(&pts->pts_virtual)) != NULL)
+			if (itimer_decr(it, tick * 1000))
+				(*it->it_ops->ito_fire)(it);
+		if ((it = LIST_FIRST(&pts->pts_prof)) != NULL)
+			if (itimer_decr(it, tick * 1000))
+				(*it->it_ops->ito_fire)(it);
 	}
-	mutex_spin_exit(&timer_lock);
+	itimer_unlock();
 }
 
+/*
+ * ptimer_intr:
+ *
+ *	Software interrupt handler for processing per-process
+ *	timer expiration.
+ */
 static void
-timer_intr(void *cookie)
+ptimer_intr(void *cookie)
 {
 	ksiginfo_t ksi;
+	struct itimer *it;
 	struct ptimer *pt;
 	proc_t *p;
 	
 	mutex_enter(&proc_lock);
-	mutex_spin_enter(&timer_lock);
-	while ((pt = TAILQ_FIRST(&timer_queue)) != NULL) {
-		TAILQ_REMOVE(&timer_queue, pt, pt_chain);
-		KASSERT(pt->pt_queued);
-		pt->pt_queued = false;
+	itimer_lock();
+	while ((it = TAILQ_FIRST(&ptimer_queue)) != NULL) {
+		TAILQ_REMOVE(&ptimer_queue, it, it_chain);
+		KASSERT(it->it_ops->ito_queue == &ptimer_queue);
+		KASSERT(it->it_queued);
+		it->it_queued = false;
+
+		pt = container_of(it, struct ptimer, pt_itimer);
 
-		if (pt->pt_proc->p_timers == NULL) {
+		p = pt->pt_proc;
+		if (p->p_timers == NULL) {
 			/* Process is dying. */
 			continue;
 		}
-		p = pt->pt_proc;
 		if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
 			continue;
 		}
 		if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo)) {
-			pt->pt_overruns++;
+			it->it_overruns++;
 			continue;
 		}
 
@@ -1530,38 +1763,12 @@ timer_intr(void *cookie)
 		ksi.ksi_signo = pt->pt_ev.sigev_signo;
 		ksi.ksi_code = SI_TIMER;
 		ksi.ksi_value = pt->pt_ev.sigev_value;
-		pt->pt_poverruns = pt->pt_overruns;
-		pt->pt_overruns = 0;
-		mutex_spin_exit(&timer_lock);
+		pt->pt_poverruns = it->it_overruns;
+		it->it_overruns = 0;
+		itimer_unlock();
 		kpsignal(p, &ksi, NULL);
-		mutex_spin_enter(&timer_lock);
+		itimer_lock();
 	}
-	mutex_spin_exit(&timer_lock);
+	itimer_unlock();
 	mutex_exit(&proc_lock);
 }
-
-/*
- * Check if the time will wrap if set to ts.
- *
- * ts - timespec describing the new time
- * delta - the delta between the current time and ts
- */
-bool
-time_wraps(struct timespec *ts, struct timespec *delta)
-{
-
-	/*
-	 * Don't allow the time to be set forward so far it
-	 * will wrap and become negative, thus allowing an
-	 * attacker to bypass the next check below.  The
-	 * cutoff is 1 year before rollover occurs, so even
-	 * if the attacker uses adjtime(2) to move the time
-	 * past the cutoff, it will take a very long time
-	 * to get to the wrap point.
-	 */
-	if ((ts->tv_sec > LLONG_MAX - 365*24*60*60) ||
-	    (delta->tv_sec < 0 || delta->tv_nsec < 0))
-		return true;
-
-	return false;
-}

Index: src/sys/sys/param.h
diff -u src/sys/sys/param.h:1.680 src/sys/sys/param.h:1.681
--- src/sys/sys/param.h:1.680	Tue Nov 24 16:17:04 2020
+++ src/sys/sys/param.h	Sat Dec  5 18:17:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: param.h,v 1.680 2020/11/24 16:17:04 christos Exp $	*/
+/*	$NetBSD: param.h,v 1.681 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -67,7 +67,7 @@
  *	2.99.9		(299000900)
  */
 
-#define	__NetBSD_Version__	999007600	/* NetBSD 9.99.76 */
+#define	__NetBSD_Version__	999007700	/* NetBSD 9.99.77 */
 
 #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
     (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)

Index: src/sys/sys/proc.h
diff -u src/sys/sys/proc.h:1.367 src/sys/sys/proc.h:1.368
--- src/sys/sys/proc.h:1.367	Sat May 23 23:42:44 2020
+++ src/sys/sys/proc.h	Sat Dec  5 18:17:01 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.367 2020/05/23 23:42:44 ad Exp $	*/
+/*	$NetBSD: proc.h,v 1.368 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -493,8 +493,6 @@ extern struct proc	*initproc;	/* Process
 
 extern const struct proclist_desc proclists[];
 
-extern struct pool	ptimer_pool;	/* Memory pool for ptimers */
-
 int		proc_find_locked(struct lwp *, struct proc **, pid_t);
 proc_t *	proc_find_raw(pid_t);
 proc_t *	proc_find(pid_t);		/* Find process by ID */

Index: src/sys/sys/timevar.h
diff -u src/sys/sys/timevar.h:1.44 src/sys/sys/timevar.h:1.45
--- src/sys/sys/timevar.h:1.44	Mon May 11 03:59:33 2020
+++ src/sys/sys/timevar.h	Sat Dec  5 18:17:01 2020
@@ -1,7 +1,7 @@
-/*	$NetBSD: timevar.h,v 1.44 2020/05/11 03:59:33 riastradh Exp $	*/
+/*	$NetBSD: timevar.h,v 1.45 2020/12/05 18:17:01 thorpej Exp $	*/
 
 /*
- *  Copyright (c) 2005, 2008, The NetBSD Foundation.
+ *  Copyright (c) 2005, 2008, 2020 The NetBSD Foundation, Inc.
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
@@ -65,33 +65,79 @@
 #include <sys/signal.h>
 #include <sys/systm.h>
 
+struct itimer;
+TAILQ_HEAD(itqueue, itimer);
+LIST_HEAD(itlist, itimer);
+
 /*
- * Structure used to manage timers in a process.
+ * Interval timer operations vector.
+ *
+ * Required fields:
+ *
+ *	- ito_queue: The queue onto which an itimer is added when it
+ *	  fires.
+ *
+ *	- ito_sihp: A pointer to a software interrupt handle that is
+ *	  scheduled to run when an itimer is added to ito_queue.
+ *
+ *	- ito_fire: A function to be called when the itimer fires.
+ *	  The timer implementation should perform whatever processing
+ *	  is necessary for that timer type and then call itimer_fire().
+ *
+ * Optional fields:
+ *
+ *	- ito_realtime_changed: A function that is called when the system
+ *	  time (CLOCK_REALTIME) is called.
+ */
+struct itimer_ops {
+	struct itqueue *ito_queue;
+	void	**ito_sihp;
+	void	(*ito_fire)(struct itimer *);
+	void	(*ito_realtime_changed)(struct itimer *);
+};
+
+/*
+ * Common interval timer data.
  */
-struct 	ptimer {
+struct itimer {
 	union {
-		callout_t	pt_ch;
 		struct {
-			LIST_ENTRY(ptimer)	pt_list;
-			int	pt_active;
-		} pt_nonreal;
-	} pt_data;
-	struct	sigevent pt_ev;
-	struct	itimerspec pt_time;
-	struct	ksiginfo pt_info;
-	int	pt_overruns;	/* Overruns currently accumulating */
-	int	pt_poverruns;	/* Overruns associated w/ a delivery */
-	int	pt_type;
-	int	pt_entry;
-	int	pt_queued;
-	bool	pt_dying;
-	struct proc *pt_proc;
-	TAILQ_ENTRY(ptimer) pt_chain;
+			callout_t		it_ch;
+			LIST_ENTRY(itimer)	it_rtchgq;
+		} it_real;
+		struct {
+			struct itlist		*it_vlist;
+			LIST_ENTRY(itimer)	it_list;
+			bool			it_active;
+		} it_virtual;
+	};
+	const struct itimer_ops *it_ops;
+	TAILQ_ENTRY(itimer) it_chain;
+	struct itimerspec it_time;
+	clockid_t it_clockid;
+	int	it_overruns;	/* Overruns currently accumulating */
+	bool	it_queued;
+	bool	it_dying;
 };
 
-#define pt_ch	pt_data.pt_ch
-#define pt_list	pt_data.pt_nonreal.pt_list
-#define pt_active	pt_data.pt_nonreal.pt_active
+#define	it_ch		it_real.it_ch
+#define	it_rtchgq	it_real.it_rtchgq
+
+#define	it_vlist	it_virtual.it_vlist
+#define	it_list		it_virtual.it_list
+#define	it_active	it_virtual.it_active
+
+/*
+ * Structure used to manage timers in a process.
+ */
+struct ptimer {
+	struct itimer pt_itimer;/* common interval timer data */
+
+	struct	sigevent pt_ev;	/* event notification info */
+	int	pt_poverruns;	/* Overruns associated w/ a delivery */
+	int	pt_entry;	/* slot in proc's timer table */
+	struct proc *pt_proc;	/* associated process */
+};
 
 #define	TIMER_MIN	4	/* [0..3] are reserved for setitimer(2) */
 				/* REAL=0,VIRTUAL=1,PROF=2,MONOTONIC=3 */
@@ -99,12 +145,10 @@ struct 	ptimer {
 #define	TIMERS_ALL	0
 #define	TIMERS_POSIX	1
 
-LIST_HEAD(ptlist, ptimer);
-
-struct	ptimers {
-	struct ptlist pts_virtual;
-	struct ptlist pts_prof;
-	struct ptimer *pts_timers[TIMER_MAX];
+struct ptimers {
+	struct itlist pts_virtual;
+	struct itlist pts_prof;
+	struct itimer *pts_timers[TIMER_MAX];
 };
 
 /*
@@ -170,7 +214,6 @@ int	itimerfix(struct timeval *);
 int	itimespecfix(struct timespec *);
 int	ppsratecheck(struct timeval *, int *, int);
 int	ratecheck(struct timeval *, const struct timeval *);
-void	realtimerexpire(void *);
 int	settime(struct proc *p, struct timespec *);
 int	nanosleep1(struct lwp *, clockid_t, int, struct timespec *,
 	    struct timespec *);
@@ -178,20 +221,23 @@ int	settimeofday1(const struct timeval *
 	    const void *, struct lwp *, bool);
 int	timer_create1(timer_t *, clockid_t, struct sigevent *, copyin_t,
 	    struct lwp *);
-void	timer_gettime(struct ptimer *, struct itimerspec *);
-int	timer_settime(struct ptimer *);
-struct	ptimers *timers_alloc(struct proc *);
-void	timers_free(struct proc *, int);
-void	timer_tick(struct lwp *, bool);
 int	tstohz(const struct timespec *);
 int	tvtohz(const struct timeval *);
 int	inittimeleft(struct timespec *, struct timespec *);
 int	gettimeleft(struct timespec *, struct timespec *);
 void	timerupcall(struct lwp *);
 void	time_init(void);
-void	time_init2(void);
 bool	time_wraps(struct timespec *, struct timespec *);
 
+void	itimer_lock(void);
+void	itimer_unlock(void);
+int	itimer_settime(struct itimer *);
+void	itimer_gettime(const struct itimer *, struct itimerspec *);
+void	itimer_fire(struct itimer *);
+
+void	ptimer_tick(struct lwp *, bool);
+void	ptimers_free(struct proc *, int);
+
 extern volatile time_t time_second;	/* current second in the epoch */
 extern volatile time_t time_uptime;	/* system uptime in seconds */
 

Reply via email to