On Mon, Feb 01, 2021 at 04:25:47PM +0100, Martin Pieuchot wrote:
> On 08/12/20(Tue) 10:06, Martin Pieuchot wrote:
> > Diff below aims to simplify the API to put a thread on a sleep queue and
> > reduce it to the following:
> >
> > sleep_setup();
> > /* check condition or release lock */
> > sleep_finish();
> >
> > It is motivated by my work to sleep the SCHED_LOCK() but might as well
> > prevent/fix some bugs.
> >
> > The tricky part of the current implementation is that sleep_setup_signal()
> > can already park/stop the current thread resulting in a context change.
> > Should any custom accounting / lock check happen before that? At least
> > two lock primitives do so currently: drm's schedule_timeout() and
> > rwlock's rw_enter().
> >
> > As a result of this diff various states can be removed and sleep_finish()
> > contains the following magic:
> >
> > 1. check for signal/parking
> > 2. context switch or remove from sleep queue
> > 3. check for signal/parking
> >
> > Note that sleep_finish() could be simplified even further but I left
> > that for later to ease the review.
>
> Updated diff on top of recent changes from claudio@, still ok?
Found the bug. The timeout for rwsleep() got lost. See below.
> Index: dev/dt/dt_dev.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
> retrieving revision 1.10
> diff -u -p -r1.10 dt_dev.c
> --- dev/dt/dt_dev.c 28 Sep 2020 13:16:58 -0000 1.10
> +++ dev/dt/dt_dev.c 26 Jan 2021 17:20:11 -0000
> @@ -225,10 +225,8 @@ dtread(dev_t dev, struct uio *uio, int f
> return (EMSGSIZE);
>
> while (!sc->ds_evtcnt) {
> - sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread");
> - sleep_setup_signal(&sls);
> - sleep_finish(&sls, !sc->ds_evtcnt);
> - error = sleep_finish_signal(&sls);
> + sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0);
> + error = sleep_finish(&sls, !sc->ds_evtcnt);
> if (error == EINTR || error == ERESTART)
> break;
> }
> Index: dev/pci/if_myx.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 if_myx.c
> --- dev/pci/if_myx.c 17 Jan 2021 02:52:21 -0000 1.114
> +++ dev/pci/if_myx.c 26 Jan 2021 17:20:11 -0000
> @@ -1397,7 +1397,7 @@ myx_down(struct myx_softc *sc)
> (void)myx_cmd(sc, MYXCMD_SET_IFDOWN, &mc, NULL);
>
> while (sc->sc_state != MYX_S_OFF) {
> - sleep_setup(&sls, sts, PWAIT, "myxdown");
> + sleep_setup(&sls, sts, PWAIT, "myxdown", 0);
> membar_consumer();
> sleep_finish(&sls, sc->sc_state != MYX_S_OFF);
> }
> Index: dev/pci/drm/drm_linux.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.76
> diff -u -p -r1.76 drm_linux.c
> --- dev/pci/drm/drm_linux.c 13 Jan 2021 01:04:49 -0000 1.76
> +++ dev/pci/drm/drm_linux.c 26 Jan 2021 17:22:50 -0000
> @@ -110,14 +110,14 @@ schedule_timeout(long timeout)
> {
> struct sleep_state sls;
> unsigned long deadline;
> - int wait, spl;
> + int wait, spl, timo = 0;
>
> MUTEX_ASSERT_LOCKED(&sch_mtx);
> KASSERT(!cold);
>
> - sleep_setup(&sls, sch_ident, sch_priority, "schto");
> if (timeout != MAX_SCHEDULE_TIMEOUT)
> - sleep_setup_timeout(&sls, timeout);
> + timo = timeout;
> + sleep_setup(&sls, sch_ident, sch_priority, "schto", timo);
>
> wait = (sch_proc == curproc && timeout > 0);
>
> @@ -125,11 +125,9 @@ schedule_timeout(long timeout)
> MUTEX_OLDIPL(&sch_mtx) = splsched();
> mtx_leave(&sch_mtx);
>
> - sleep_setup_signal(&sls);
> -
> if (timeout != MAX_SCHEDULE_TIMEOUT)
> deadline = jiffies + timeout;
> - sleep_finish_all(&sls, wait);
> + sleep_finish(&sls, wait);
> if (timeout != MAX_SCHEDULE_TIMEOUT)
> timeout = deadline - jiffies;
>
> Index: kern/kern_rwlock.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 kern_rwlock.c
> --- kern/kern_rwlock.c 11 Jan 2021 18:49:38 -0000 1.46
> +++ kern/kern_rwlock.c 26 Jan 2021 17:20:11 -0000
> @@ -279,15 +279,13 @@ retry:
> prio = op->wait_prio;
> if (flags & RW_INTR)
> prio |= PCATCH;
> - sleep_setup(&sls, rwl, prio, rwl->rwl_name);
> - if (flags & RW_INTR)
> - sleep_setup_signal(&sls);
> + sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0);
>
> do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
>
> - sleep_finish(&sls, do_sleep);
> + error = sleep_finish(&sls, do_sleep);
> if ((flags & RW_INTR) &&
> - (error = sleep_finish_signal(&sls)) != 0)
> + (error != 0))
> return (error);
> if (flags & RW_SLEEPFAIL)
> return (EAGAIN);
> Index: kern/kern_sched.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sched.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 kern_sched.c
> --- kern/kern_sched.c 9 Jan 2021 20:57:46 -0000 1.68
> +++ kern/kern_sched.c 26 Jan 2021 17:20:11 -0000
> @@ -674,7 +674,7 @@ sched_stop_secondary_cpus(void)
> if (CPU_IS_PRIMARY(ci))
> continue;
> while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
> - sleep_setup(&sls, spc, PZERO, "schedstate");
> + sleep_setup(&sls, spc, PZERO, "schedstate", 0);
> sleep_finish(&sls,
> (spc->spc_schedflags & SPCF_HALTED) == 0);
> }
> Index: kern/kern_sig.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> retrieving revision 1.270
> diff -u -p -r1.270 kern_sig.c
> --- kern/kern_sig.c 25 Dec 2020 12:59:52 -0000 1.270
> +++ kern/kern_sig.c 1 Feb 2021 15:23:41 -0000
> @@ -2106,7 +2106,7 @@ single_thread_wait(struct process *pr, i
> /* wait until they're all suspended */
> wait = pr->ps_singlecount > 0;
> while (wait) {
> - sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend");
> + sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend", 0);
> wait = pr->ps_singlecount > 0;
> sleep_finish(&sls, wait);
> if (!recheck)
> Index: kern/kern_synch.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.174
> diff -u -p -r1.174 kern_synch.c
> --- kern/kern_synch.c 11 Jan 2021 13:55:53 -0000 1.174
> +++ kern/kern_synch.c 26 Jan 2021 18:19:52 -0000
> @@ -66,7 +66,7 @@
> #include <sys/ktrace.h>
> #endif
>
> -int sleep_signal_check(struct proc *);
> +int sleep_signal_check(void);
> int thrsleep(struct proc *, struct sys___thrsleep_args *);
> int thrsleep_unlock(void *);
>
> @@ -155,11 +155,8 @@ tsleep(const volatile void *ident, int p
> return (0);
> }
>
> - sleep_setup(&sls, ident, priority, wmesg);
> - sleep_setup_timeout(&sls, timo);
> - sleep_setup_signal(&sls);
> -
> - return sleep_finish_all(&sls, 1);
> + sleep_setup(&sls, ident, priority, wmesg, timo);
> + return sleep_finish(&sls, 1);
> }
>
> int
> @@ -250,8 +247,7 @@ msleep(const volatile void *ident, struc
> return (0);
> }
>
> - sleep_setup(&sls, ident, priority, wmesg);
> - sleep_setup_timeout(&sls, timo);
> + sleep_setup(&sls, ident, priority, wmesg, timo);
>
> /* XXX - We need to make sure that the mutex doesn't
> * unblock splsched. This can be made a bit more
> @@ -261,9 +257,7 @@ msleep(const volatile void *ident, struc
> MUTEX_OLDIPL(mtx) = splsched();
> mtx_leave(mtx);
> /* signal may stop the process, release mutex before that */
> - sleep_setup_signal(&sls);
> -
> - error = sleep_finish_all(&sls, 1);
> + error = sleep_finish(&sls, 1);
>
> if ((priority & PNORELOCK) == 0) {
> mtx_enter(mtx);
> @@ -313,14 +307,11 @@ rwsleep(const volatile void *ident, stru
> rw_assert_anylock(rwl);
> status = rw_status(rwl);
>
> - sleep_setup(&sls, ident, priority, wmesg);
> - sleep_setup_timeout(&sls, timo);
> + sleep_setup(&sls, ident, priority, wmesg, 0);
This needs to be
sleep_setup(&sls, ident, priority, wmesg, timo);
>
> rw_exit(rwl);
> /* signal may stop the process, release rwlock before that */
> - sleep_setup_signal(&sls);
> -
> - error = sleep_finish_all(&sls, 1);
> + error = sleep_finish(&sls, 1);
>
> if ((priority & PNORELOCK) == 0)
> rw_enter(rwl, status);
> @@ -353,7 +344,7 @@ rwsleep_nsec(const volatile void *ident,
>
> void
> sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
> - const char *wmesg)
> + const char *wmesg, int timo)
> {
> struct proc *p = curproc;
>
> @@ -367,14 +358,12 @@ sleep_setup(struct sleep_state *sls, con
> #endif
>
> sls->sls_catch = prio & PCATCH;
> - sls->sls_do_sleep = 1;
> sls->sls_locked = 0;
> - sls->sls_sigerr = 0;
> sls->sls_timeout = 0;
>
> /*
> * The kernel has to be locked for signal processing.
> - * This is done here and not in sleep_setup_signal() because
> + * This is done here and not in sleep_finish() because
> * KERNEL_LOCK() has to be taken before SCHED_LOCK().
> */
> if (sls->sls_catch != 0) {
> @@ -391,35 +380,53 @@ sleep_setup(struct sleep_state *sls, con
> p->p_slptime = 0;
> p->p_slppri = prio & PRIMASK;
> TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
> -}
> -
> -int
> -sleep_finish_all(struct sleep_state *sls, int do_sleep)
> -{
> - int error, error1;
> -
> - sleep_finish(sls, do_sleep);
> - error1 = sleep_finish_timeout(sls);
> - error = sleep_finish_signal(sls);
>
> - /* Signal errors are higher priority than timeouts. */
> - if (error == 0 && error1 != 0)
> - error = error1;
> -
> - return error;
> + KASSERT((p->p_flag & P_TIMEOUT) == 0);
> + if (timo) {
> + sls->sls_timeout = 1;
> + timeout_add(&p->p_sleep_to, timo);
> + }
> }
>
> -void
> +int
> sleep_finish(struct sleep_state *sls, int do_sleep)
> {
> struct proc *p = curproc;
> + int error = 0, error1 = 0;
> +
> + if (sls->sls_catch != 0) {
> + /* sleep_setup() has locked the kernel. */
> + KERNEL_ASSERT_LOCKED();
> +
> + /*
> + * We put ourselves on the sleep queue and start our
> + * timeout before calling sleep_signal_check(), as we could
> + * stop there, and a wakeup or a SIGCONT (or both) could
> + * occur while we were stopped. A SIGCONT would cause
> + * us to be marked as SSLEEP without resuming us, thus
> + * we must be ready for sleep when sleep_signal_check() is
> + * called.
> + * If the wakeup happens while we're stopped, p->p_wchan
> + * will be NULL upon return from sleep_signal_check(). In
> + * that case we need to unwind immediately.
> + */
> + atomic_setbits_int(&p->p_flag, P_SINTR);
> + if ((error = sleep_signal_check()) != 0) {
> + p->p_stat = SONPROC;
> + sls->sls_catch = 0;
> + do_sleep = 0;
> + } else if (p->p_wchan == NULL) {
> + sls->sls_catch = 0;
> + do_sleep = 0;
> + }
> + }
>
> - if (sls->sls_do_sleep && do_sleep) {
> + if (do_sleep) {
> p->p_stat = SSLEEP;
> p->p_ru.ru_nvcsw++;
> SCHED_ASSERT_LOCKED();
> mi_switch();
> - } else if (!do_sleep) {
> + } else {
> unsleep(p);
> }
>
> @@ -436,30 +443,11 @@ sleep_finish(struct sleep_state *sls, in
> * we need to clear it before the ktrace.
> */
> atomic_clearbits_int(&p->p_flag, P_SINTR);
> -}
> -
> -void
> -sleep_setup_timeout(struct sleep_state *sls, int timo)
> -{
> - struct proc *p = curproc;
> -
> - KASSERT((p->p_flag & P_TIMEOUT) == 0);
> -
> - if (timo) {
> - sls->sls_timeout = 1;
> - timeout_add(&p->p_sleep_to, timo);
> - }
> -}
> -
> -int
> -sleep_finish_timeout(struct sleep_state *sls)
> -{
> - struct proc *p = curproc;
>
> if (sls->sls_timeout) {
> if (p->p_flag & P_TIMEOUT) {
> atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
> - return (EWOULDBLOCK);
> + error1 = EWOULDBLOCK;
> } else {
> /* This must not sleep. */
> timeout_del_barrier(&p->p_sleep_to);
> @@ -467,78 +455,18 @@ sleep_finish_timeout(struct sleep_state
> }
> }
>
> - return (0);
> -}
> -
> -int
> -sleep_signal_check(struct proc *p)
> -{
> - int err, sig;
> -
> - if ((err = single_thread_check(p, 1)) != 0)
> - return err;
> - if ((sig = CURSIG(p)) != 0) {
> - if (p->p_p->ps_sigacts->ps_sigintr & sigmask(sig))
> - return EINTR;
> - else
> - return ERESTART;
> - }
> - return 0;
> -}
> -
> -void
> -sleep_setup_signal(struct sleep_state *sls)
> -{
> - struct proc *p = curproc;
> -
> - if (sls->sls_catch == 0)
> - return;
> -
> - /* sleep_setup() has locked the kernel. */
> - KERNEL_ASSERT_LOCKED();
> -
> - /*
> - * We put ourselves on the sleep queue and start our timeout before
> - * calling single_thread_check or CURSIG, as we could stop there, and
> - * a wakeup or a SIGCONT (or both) could occur while we were stopped.
> - * A SIGCONT would cause us to be marked as SSLEEP without resuming us,
> - * thus we must be ready for sleep when CURSIG is called. If the
> - * wakeup happens while we're stopped, p->p_wchan will be 0 upon
> - * return from single_thread_check or CURSIG. In that case we should
> - * not go to sleep. If single_thread_check returns an error we need
> - * to unwind immediately. That's achieved by saving the return value
> - * in sls->sl_unwind and checking it later in sleep_finish_signal.
> - */
> - atomic_setbits_int(&p->p_flag, P_SINTR);
> - if ((sls->sls_sigerr = sleep_signal_check(p)) != 0) {
> - unsleep(p);
> - p->p_stat = SONPROC;
> - sls->sls_do_sleep = 0;
> - } else if (p->p_wchan == 0) {
> - sls->sls_catch = 0;
> - sls->sls_do_sleep = 0;
> - }
> -}
> -
> -int
> -sleep_finish_signal(struct sleep_state *sls)
> -{
> - struct proc *p = curproc;
> - int error = 0;
> -
> - if (sls->sls_catch != 0) {
> - KERNEL_ASSERT_LOCKED();
> -
> - if (sls->sls_sigerr != 0)
> - error = sls->sls_sigerr;
> - else
> - error = sleep_signal_check(p);
> - }
> + /* Check if thread was woken up because of a unwind or signal */
> + if (sls->sls_catch != 0)
> + error = sleep_signal_check();
>
> if (sls->sls_locked)
> KERNEL_UNLOCK();
>
> - return (error);
> + /* Signal errors are higher priority than timeouts. */
> + if (error == 0 && error1 != 0)
> + error = error1;
> +
> + return error;
> }
>
> int
> @@ -560,6 +488,27 @@ wakeup_proc(struct proc *p, const volati
> return awakened;
> }
>
> +
> +/*
> + * Check and handle signals and suspensions around a sleep cycle.
> + */
> +int
> +sleep_signal_check(void)
> +{
> + struct proc *p = curproc;
> + int err, sig;
> +
> + if ((err = single_thread_check(p, 1)) != 0)
> + return err;
> + if ((sig = CURSIG(p)) != 0) {
> + if (p->p_p->ps_sigacts->ps_sigintr & sigmask(sig))
> + return EINTR;
> + else
> + return ERESTART;
> + }
> + return 0;
> +}
> +
I would prefer if this function is above wakeup_proc() so that all sleep_*
functions are together.
> /*
> * Implement timeout for tsleep.
> * If process hasn't been awakened (wchan non-zero),
> @@ -895,7 +844,7 @@ refcnt_finalize(struct refcnt *r, const
>
> refcnt = atomic_dec_int_nv(&r->refs);
> while (refcnt) {
> - sleep_setup(&sls, r, PWAIT, wmesg);
> + sleep_setup(&sls, r, PWAIT, wmesg, 0);
> refcnt = r->refs;
> sleep_finish(&sls, refcnt);
> }
> @@ -923,7 +872,7 @@ cond_wait(struct cond *c, const char *wm
>
> wait = c->c_wait;
> while (wait) {
> - sleep_setup(&sls, c, PWAIT, wmesg);
> + sleep_setup(&sls, c, PWAIT, wmesg, 0);
> wait = c->c_wait;
> sleep_finish(&sls, wait);
> }
> Index: kern/kern_timeout.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_timeout.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 kern_timeout.c
> --- kern/kern_timeout.c 20 Oct 2020 22:37:12 -0000 1.82
> +++ kern/kern_timeout.c 26 Jan 2021 17:20:11 -0000
> @@ -787,7 +787,7 @@ softclock_thread(void *arg)
>
> s = splsoftclock();
> for (;;) {
> - sleep_setup(&sls, &timeout_proc, PSWP, "bored");
> + sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0);
> sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc));
>
> mtx_enter(&timeout_mutex);
> Index: kern/subr_log.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/subr_log.c,v
> retrieving revision 1.71
> diff -u -p -r1.71 subr_log.c
> --- kern/subr_log.c 8 Jan 2021 11:23:57 -0000 1.71
> +++ kern/subr_log.c 26 Jan 2021 17:20:11 -0000
> @@ -246,10 +246,8 @@ logread(dev_t dev, struct uio *uio, int
> * Set up and enter sleep manually instead of using msleep()
> * to keep log_mtx as a leaf lock.
> */
> - sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog");
> - sleep_setup_signal(&sls);
> - sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT);
> - error = sleep_finish_signal(&sls);
> + sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog", 0);
> + error = sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT);
> mtx_enter(&log_mtx);
> if (error)
> goto out;
> Index: sys/proc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/proc.h,v
> retrieving revision 1.306
> diff -u -p -r1.306 proc.h
> --- sys/proc.h 18 Jan 2021 18:47:05 -0000 1.306
> +++ sys/proc.h 26 Jan 2021 17:21:09 -0000
> @@ -625,9 +625,7 @@ int proc_cansugid(struct proc *);
> struct sleep_state {
> int sls_s;
> int sls_catch;
> - int sls_do_sleep;
> int sls_locked;
> - int sls_sigerr;
> int sls_timeout;
> };
>
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.150
> diff -u -p -r1.150 systm.h
> --- sys/systm.h 27 Dec 2020 11:38:35 -0000 1.150
> +++ sys/systm.h 26 Jan 2021 17:20:11 -0000
> @@ -257,13 +257,8 @@ void stop_periodic_resettodr(void);
>
> struct sleep_state;
> void sleep_setup(struct sleep_state *, const volatile void *, int,
> - const char *);
> -void sleep_setup_timeout(struct sleep_state *, int);
> -void sleep_setup_signal(struct sleep_state *);
> -void sleep_finish(struct sleep_state *, int);
> -int sleep_finish_timeout(struct sleep_state *);
> -int sleep_finish_signal(struct sleep_state *);
> -int sleep_finish_all(struct sleep_state *, int);
> + const char *, int);
> +int sleep_finish(struct sleep_state *, int);
> void sleep_queue_init(void);
>
> struct cond;
>
With this fix it works for me now.
--
:wq Claudio