Diff below aims to simplify the API to put a thread on a sleep queue and
reduce it to the following:
sleep_setup();
/* check condition or release lock */
sleep_finish();
It is motivated by my work to sleep the SCHED_LOCK() but might as well
prevent/fix some bugs.
The tricky part of the current implementation is that sleep_setup_signal()
can already park/stop the current thread resulting in a context change.
Should any custom accounting / lock check happen before that? At least
two lock primitives do so currently: drm's schedule_timeout() and
rwlock's rw_enter().
As a result of this diff various states can be removed and sleep_finish()
contains the following magic:
1. check for signal/parking
2. context switch or remove from sleep queue
3. check for signal/parking
Note that sleep_finish() could be simplified even further but I left
that for later to ease the review.
Comments? Oks?
Index: dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.10
diff -u -p -r1.10 dt_dev.c
--- dev/dt/dt_dev.c 28 Sep 2020 13:16:58 -0000 1.10
+++ dev/dt/dt_dev.c 7 Dec 2020 17:19:15 -0000
@@ -225,10 +225,8 @@ dtread(dev_t dev, struct uio *uio, int f
return (EMSGSIZE);
while (!sc->ds_evtcnt) {
- sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread");
- sleep_setup_signal(&sls);
- sleep_finish(&sls, !sc->ds_evtcnt);
- error = sleep_finish_signal(&sls);
+ sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0);
+ error = sleep_finish(&sls, !sc->ds_evtcnt);
if (error == EINTR || error == ERESTART)
break;
}
Index: dev/pci/drm/drm_linux.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.70
diff -u -p -r1.70 drm_linux.c
--- dev/pci/drm/drm_linux.c 14 Nov 2020 23:08:47 -0000 1.70
+++ dev/pci/drm/drm_linux.c 7 Dec 2020 17:19:15 -0000
@@ -110,26 +110,23 @@ schedule_timeout(long timeout)
{
struct sleep_state sls;
long deadline;
- int wait, spl;
+ int wait, spl, timo = 0;
MUTEX_ASSERT_LOCKED(&sch_mtx);
KASSERT(!cold);
- sleep_setup(&sls, sch_ident, sch_priority, "schto");
if (timeout != MAX_SCHEDULE_TIMEOUT)
- sleep_setup_timeout(&sls, timeout);
+ timo = timeout;
+ sleep_setup(&sls, sch_ident, sch_priority, "schto", timo);
wait = (sch_proc == curproc && timeout > 0);
spl = MUTEX_OLDIPL(&sch_mtx);
MUTEX_OLDIPL(&sch_mtx) = splsched();
mtx_leave(&sch_mtx);
-
- sleep_setup_signal(&sls);
-
if (timeout != MAX_SCHEDULE_TIMEOUT)
deadline = ticks + timeout;
- sleep_finish_all(&sls, wait);
+ sleep_finish(&sls, wait);
if (timeout != MAX_SCHEDULE_TIMEOUT)
timeout = deadline - ticks;
Index: dev/pci/if_myx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
retrieving revision 1.112
diff -u -p -r1.112 if_myx.c
--- dev/pci/if_myx.c 27 Nov 2020 00:13:15 -0000 1.112
+++ dev/pci/if_myx.c 7 Dec 2020 17:19:15 -0000
@@ -1396,7 +1396,7 @@ myx_down(struct myx_softc *sc)
(void)myx_cmd(sc, MYXCMD_SET_IFDOWN, &mc, NULL);
while (sc->sc_state != MYX_S_OFF) {
- sleep_setup(&sls, sts, PWAIT, "myxdown");
+ sleep_setup(&sls, sts, PWAIT, "myxdown", 0);
membar_consumer();
sleep_finish(&sls, sc->sc_state != MYX_S_OFF);
}
Index: kern/kern_rwlock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.45
diff -u -p -r1.45 kern_rwlock.c
--- kern/kern_rwlock.c 2 Mar 2020 17:07:49 -0000 1.45
+++ kern/kern_rwlock.c 7 Dec 2020 17:19:15 -0000
@@ -278,15 +278,13 @@ retry:
prio = op->wait_prio;
if (flags & RW_INTR)
prio |= PCATCH;
- sleep_setup(&sls, rwl, prio, rwl->rwl_name);
- if (flags & RW_INTR)
- sleep_setup_signal(&sls);
+ sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0);
do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
- sleep_finish(&sls, do_sleep);
+ error = sleep_finish(&sls, do_sleep);
if ((flags & RW_INTR) &&
- (error = sleep_finish_signal(&sls)) != 0)
+ (error != 0))
return (error);
if (flags & RW_SLEEPFAIL)
return (EAGAIN);
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.67
diff -u -p -r1.67 kern_sched.c
--- kern/kern_sched.c 11 Jun 2020 00:00:01 -0000 1.67
+++ kern/kern_sched.c 7 Dec 2020 17:19:15 -0000
@@ -674,7 +674,7 @@ sched_stop_secondary_cpus(void)
if (CPU_IS_PRIMARY(ci))
continue;
while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
- sleep_setup(&sls, spc, PZERO, "schedstate");
+ sleep_setup(&sls, spc, PZERO, "schedstate", 0);
sleep_finish(&sls,
(spc->spc_schedflags & SPCF_HALTED) == 0);
}
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.267
diff -u -p -r1.267 kern_sig.c
--- kern/kern_sig.c 4 Dec 2020 15:16:45 -0000 1.267
+++ kern/kern_sig.c 7 Dec 2020 17:19:15 -0000
@@ -1467,6 +1467,31 @@ postsig(struct proc *p, int signum)
}
/*
+ * Check and handle signals and suspensions around a sleep cycle.
+ */
+int
+sigsleep(void)
+{
+ struct proc *p = curproc;
+ struct sigacts *ps = p->p_p->ps_sigacts;
+ int sig, error = 0;
+
+ error = single_thread_check(p, 1);
+ if (error)
+ return error;
+
+ sig = CURSIG(p);
+ if (sig != 0) {
+ if (ps->ps_sigintr & sigmask(sig))
+ error = EINTR;
+ else
+ error = ERESTART;
+ }
+
+ return error;
+}
+
+/*
* Force the current process to exit with the specified signal, dumping core
* if appropriate. We bypass the normal tests for masked and caught signals,
* allowing unrecoverable failures to terminate the process without changing
@@ -2106,7 +2131,7 @@ single_thread_wait(struct process *pr, i
/* wait until they're all suspended */
wait = pr->ps_singlecount > 0;
while (wait) {
- sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend");
+ sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend", 0);
wait = pr->ps_singlecount > 0;
sleep_finish(&sls, wait);
if (!recheck)
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.171
diff -u -p -r1.171 kern_synch.c
--- kern/kern_synch.c 23 Oct 2020 20:28:09 -0000 1.171
+++ kern/kern_synch.c 7 Dec 2020 17:32:32 -0000
@@ -147,11 +147,8 @@ tsleep(const volatile void *ident, int p
return (0);
}
- sleep_setup(&sls, ident, priority, wmesg);
- sleep_setup_timeout(&sls, timo);
- sleep_setup_signal(&sls);
-
- return sleep_finish_all(&sls, 1);
+ sleep_setup(&sls, ident, priority, wmesg, timo);
+ return sleep_finish(&sls, 1);
}
int
@@ -241,8 +238,7 @@ msleep(const volatile void *ident, struc
return (0);
}
- sleep_setup(&sls, ident, priority, wmesg);
- sleep_setup_timeout(&sls, timo);
+ sleep_setup(&sls, ident, priority, wmesg, timo);
/* XXX - We need to make sure that the mutex doesn't
* unblock splsched. This can be made a bit more
@@ -252,9 +248,7 @@ msleep(const volatile void *ident, struc
MUTEX_OLDIPL(mtx) = splsched();
mtx_leave(mtx);
/* signal may stop the process, release mutex before that */
- sleep_setup_signal(&sls);
-
- error = sleep_finish_all(&sls, 1);
+ error = sleep_finish(&sls, 1);
if ((priority & PNORELOCK) == 0) {
mtx_enter(mtx);
@@ -303,14 +297,11 @@ rwsleep(const volatile void *ident, stru
rw_assert_anylock(rwl);
status = rw_status(rwl);
- sleep_setup(&sls, ident, priority, wmesg);
- sleep_setup_timeout(&sls, timo);
+ sleep_setup(&sls, ident, priority, wmesg, 0);
rw_exit(rwl);
/* signal may stop the process, release rwlock before that */
- sleep_setup_signal(&sls);
-
- error = sleep_finish_all(&sls, 1);
+ error = sleep_finish(&sls, 1);
if ((priority & PNORELOCK) == 0)
rw_enter(rwl, status);
@@ -343,7 +334,7 @@ rwsleep_nsec(const volatile void *ident,
void
sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
- const char *wmesg)
+ const char *wmesg, int timo)
{
struct proc *p = curproc;
@@ -357,15 +348,12 @@ sleep_setup(struct sleep_state *sls, con
#endif
sls->sls_catch = prio & PCATCH;
- sls->sls_do_sleep = 1;
sls->sls_locked = 0;
- sls->sls_sig = 0;
- sls->sls_unwind = 0;
sls->sls_timeout = 0;
/*
* The kernel has to be locked for signal processing.
- * This is done here and not in sleep_setup_signal() because
+ * This is done here and not in sleep_finish() because
* KERNEL_LOCK() has to be taken before SCHED_LOCK().
*/
if (sls->sls_catch != 0) {
@@ -382,35 +370,52 @@ sleep_setup(struct sleep_state *sls, con
p->p_slptime = 0;
p->p_slppri = prio & PRIMASK;
TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
-}
-
-int
-sleep_finish_all(struct sleep_state *sls, int do_sleep)
-{
- int error, error1;
-
- sleep_finish(sls, do_sleep);
- error1 = sleep_finish_timeout(sls);
- error = sleep_finish_signal(sls);
-
- /* Signal errors are higher priority than timeouts. */
- if (error == 0 && error1 != 0)
- error = error1;
- return error;
+ KASSERT((p->p_flag & P_TIMEOUT) == 0);
+ if (timo) {
+ sls->sls_timeout = 1;
+ timeout_add(&p->p_sleep_to, timo);
+ }
}
-void
+int
sleep_finish(struct sleep_state *sls, int do_sleep)
{
struct proc *p = curproc;
+ int error = 0, error1 = 0;
+
+ if (sls->sls_catch != 0) {
+ /* sleep_setup() has locked the kernel. */
+ KERNEL_ASSERT_LOCKED();
+
+ /*
+ * We put ourselves on the sleep queue and start our
+ * timeout before calling sigsleep(), as we could stop
+ * there, and a wakeup or a SIGCONT (or both) could
+ * occur while we were stopped. A SIGCONT would cause
+ * us to be marked as SSLEEP without resuming us, thus
+ * we must be ready for sleep when sigsleep() is called.
+ * If the wakeup happens while we're stopped, p->p_wchan
+ * will be NULL upon return from sigsleep(). In that case
+ * we need to unwind immediately.
+ */
+ atomic_setbits_int(&p->p_flag, P_SINTR);
+ if ((error = sigsleep()) != 0) {
+ p->p_stat = SONPROC;
+ sls->sls_catch = 0;
+ do_sleep = 0;
+ } else if (p->p_wchan == NULL) {
+ sls->sls_catch = 0;
+ do_sleep = 0;
+ }
+ }
- if (sls->sls_do_sleep && do_sleep) {
+ if (do_sleep) {
p->p_stat = SSLEEP;
p->p_ru.ru_nvcsw++;
SCHED_ASSERT_LOCKED();
mi_switch();
- } else if (!do_sleep) {
+ } else {
unsleep(p);
}
@@ -427,30 +432,11 @@ sleep_finish(struct sleep_state *sls, in
* we need to clear it before the ktrace.
*/
atomic_clearbits_int(&p->p_flag, P_SINTR);
-}
-
-void
-sleep_setup_timeout(struct sleep_state *sls, int timo)
-{
- struct proc *p = curproc;
-
- KASSERT((p->p_flag & P_TIMEOUT) == 0);
-
- if (timo) {
- sls->sls_timeout = 1;
- timeout_add(&p->p_sleep_to, timo);
- }
-}
-
-int
-sleep_finish_timeout(struct sleep_state *sls)
-{
- struct proc *p = curproc;
if (sls->sls_timeout) {
if (p->p_flag & P_TIMEOUT) {
atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
- return (EWOULDBLOCK);
+ error1 = EWOULDBLOCK;
} else {
/* This must not sleep. */
timeout_del_barrier(&p->p_sleep_to);
@@ -458,70 +444,18 @@ sleep_finish_timeout(struct sleep_state
}
}
- return (0);
-}
-
-void
-sleep_setup_signal(struct sleep_state *sls)
-{
- struct proc *p = curproc;
-
- if (sls->sls_catch == 0)
- return;
-
- /* sleep_setup() has locked the kernel. */
- KERNEL_ASSERT_LOCKED();
-
- /*
- * We put ourselves on the sleep queue and start our timeout before
- * calling single_thread_check or CURSIG, as we could stop there, and
- * a wakeup or a SIGCONT (or both) could occur while we were stopped.
- * A SIGCONT would cause us to be marked as SSLEEP without resuming us,
- * thus we must be ready for sleep when CURSIG is called. If the
- * wakeup happens while we're stopped, p->p_wchan will be 0 upon
- * return from single_thread_check or CURSIG. In that case we should
- * not go to sleep. If single_thread_check returns an error we need
- * to unwind immediately. That's achieved by saving the return value
- * in sls->sl_unwind and checking it later in sleep_finish_signal.
- */
- atomic_setbits_int(&p->p_flag, P_SINTR);
- if ((sls->sls_unwind = single_thread_check(p, 1)) != 0 ||
- (sls->sls_sig = CURSIG(p)) != 0) {
- unsleep(p);
- p->p_stat = SONPROC;
- sls->sls_do_sleep = 0;
- } else if (p->p_wchan == 0) {
- sls->sls_catch = 0;
- sls->sls_do_sleep = 0;
- }
-}
-
-int
-sleep_finish_signal(struct sleep_state *sls)
-{
- struct proc *p = curproc;
- int error = 0;
-
- if (sls->sls_catch != 0) {
- KERNEL_ASSERT_LOCKED();
-
- if (sls->sls_unwind != 0 ||
- (sls->sls_unwind = single_thread_check(p, 1)) != 0)
- error = sls->sls_unwind;
- else if (sls->sls_sig != 0 ||
- (sls->sls_sig = CURSIG(p)) != 0) {
- if (p->p_p->ps_sigacts->ps_sigintr &
- sigmask(sls->sls_sig))
- error = EINTR;
- else
- error = ERESTART;
- }
- }
+ /* Check if thread was woken up because of a unwind or signal */
+ if (sls->sls_catch != 0)
+ error = sigsleep();
if (sls->sls_locked)
KERNEL_UNLOCK();
- return (error);
+ /* Signal errors are higher priority than timeouts. */
+ if (error == 0 && error1 != 0)
+ error = error1;
+
+ return error;
}
int
@@ -878,7 +812,7 @@ refcnt_finalize(struct refcnt *r, const
refcnt = atomic_dec_int_nv(&r->refs);
while (refcnt) {
- sleep_setup(&sls, r, PWAIT, wmesg);
+ sleep_setup(&sls, r, PWAIT, wmesg, 0);
refcnt = r->refs;
sleep_finish(&sls, refcnt);
}
@@ -906,7 +840,7 @@ cond_wait(struct cond *c, const char *wm
wait = c->c_wait;
while (wait) {
- sleep_setup(&sls, c, PWAIT, wmesg);
+ sleep_setup(&sls, c, PWAIT, wmesg, 0);
wait = c->c_wait;
sleep_finish(&sls, wait);
}
Index: kern/kern_timeout.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_timeout.c,v
retrieving revision 1.82
diff -u -p -r1.82 kern_timeout.c
--- kern/kern_timeout.c 20 Oct 2020 22:37:12 -0000 1.82
+++ kern/kern_timeout.c 7 Dec 2020 17:19:15 -0000
@@ -787,7 +787,7 @@ softclock_thread(void *arg)
s = splsoftclock();
for (;;) {
- sleep_setup(&sls, &timeout_proc, PSWP, "bored");
+ sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0);
sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc));
mtx_enter(&timeout_mutex);
Index: kern/subr_log.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.69
diff -u -p -r1.69 subr_log.c
--- kern/subr_log.c 25 Oct 2020 10:55:42 -0000 1.69
+++ kern/subr_log.c 7 Dec 2020 17:19:15 -0000
@@ -235,10 +235,8 @@ logread(dev_t dev, struct uio *uio, int
* Set up and enter sleep manually instead of using msleep()
* to keep log_mtx as a leaf lock.
*/
- sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog");
- sleep_setup_signal(&sls);
- sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT);
- error = sleep_finish_signal(&sls);
+ sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog", 0);
+ error = sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT);
mtx_enter(&log_mtx);
if (error)
goto out;
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.301
diff -u -p -r1.301 proc.h
--- sys/proc.h 10 Nov 2020 17:26:54 -0000 1.301
+++ sys/proc.h 7 Dec 2020 17:19:15 -0000
@@ -612,10 +615,7 @@ int proc_cansugid(struct proc *);
struct sleep_state {
int sls_s;
int sls_catch;
- int sls_do_sleep;
int sls_locked;
- int sls_sig;
- int sls_unwind;
int sls_timeout;
};
Index: sys/signalvar.h
===================================================================
RCS file: /cvs/src/sys/sys/signalvar.h,v
retrieving revision 1.45
diff -u -p -r1.45 signalvar.h
--- sys/signalvar.h 8 Nov 2020 20:37:24 -0000 1.45
+++ sys/signalvar.h 7 Dec 2020 17:19:15 -0000
@@ -128,6 +128,7 @@ void trapsignal(struct proc *p, int sig,
void sigexit(struct proc *, int);
void sigabort(struct proc *);
int sigismasked(struct proc *, int);
+int sigsleep(void);
int sigonstack(size_t);
int killpg1(struct proc *, int, int, int);
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.148
diff -u -p -r1.148 systm.h
--- sys/systm.h 26 Aug 2020 03:29:07 -0000 1.148
+++ sys/systm.h 7 Dec 2020 17:19:15 -0000
@@ -255,13 +255,8 @@ void stop_periodic_resettodr(void);
struct sleep_state;
void sleep_setup(struct sleep_state *, const volatile void *, int,
- const char *);
-void sleep_setup_timeout(struct sleep_state *, int);
-void sleep_setup_signal(struct sleep_state *);
-void sleep_finish(struct sleep_state *, int);
-int sleep_finish_timeout(struct sleep_state *);
-int sleep_finish_signal(struct sleep_state *);
-int sleep_finish_all(struct sleep_state *, int);
+ const char *, int);
+int sleep_finish(struct sleep_state *, int);
void sleep_queue_init(void);
struct cond;