On Mon, Feb 01, 2021 at 04:25:47PM +0100, Martin Pieuchot wrote: > On 08/12/20(Tue) 10:06, Martin Pieuchot wrote: > > Diff below aims to simplify the API to put a thread on a sleep queue and > > reduce it to the following: > > > > sleep_setup(); > > /* check condition or release lock */ > > sleep_finish(); > > > > It is motivated by my work to sleep the SCHED_LOCK() but might as well > > prevent/fix some bugs. > > > > The tricky part of the current implementation is that sleep_setup_signal() > > can already park/stop the current thread resulting in a context change. > > Should any custom accounting / lock check happen before that? At least > > two lock primitives do so currently: drm's schedule_timeout() and > > rwlock's rw_enter(). > > > > As a result of this diff various states can be removed and sleep_finish() > > contains the following magic: > > > > 1. check for signal/parking > > 2. context switch or remove from sleep queue > > 3. check for signal/parking > > > > Note that sleep_finish() could be simplified even further but I left > > that for later to ease the review. > > Updated diff on top of recent changes from claudio@, still ok?
Found the bug. The timeout for rwsleep() got lost. See below. > Index: dev/dt/dt_dev.c > =================================================================== > RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v > retrieving revision 1.10 > diff -u -p -r1.10 dt_dev.c > --- dev/dt/dt_dev.c 28 Sep 2020 13:16:58 -0000 1.10 > +++ dev/dt/dt_dev.c 26 Jan 2021 17:20:11 -0000 > @@ -225,10 +225,8 @@ dtread(dev_t dev, struct uio *uio, int f > return (EMSGSIZE); > > while (!sc->ds_evtcnt) { > - sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread"); > - sleep_setup_signal(&sls); > - sleep_finish(&sls, !sc->ds_evtcnt); > - error = sleep_finish_signal(&sls); > + sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0); > + error = sleep_finish(&sls, !sc->ds_evtcnt); > if (error == EINTR || error == ERESTART) > break; > } > Index: dev/pci/if_myx.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_myx.c,v > retrieving revision 1.114 > diff -u -p -r1.114 if_myx.c > --- dev/pci/if_myx.c 17 Jan 2021 02:52:21 -0000 1.114 > +++ dev/pci/if_myx.c 26 Jan 2021 17:20:11 -0000 > @@ -1397,7 +1397,7 @@ myx_down(struct myx_softc *sc) > (void)myx_cmd(sc, MYXCMD_SET_IFDOWN, &mc, NULL); > > while (sc->sc_state != MYX_S_OFF) { > - sleep_setup(&sls, sts, PWAIT, "myxdown"); > + sleep_setup(&sls, sts, PWAIT, "myxdown", 0); > membar_consumer(); > sleep_finish(&sls, sc->sc_state != MYX_S_OFF); > } > Index: dev/pci/drm/drm_linux.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v > retrieving revision 1.76 > diff -u -p -r1.76 drm_linux.c > --- dev/pci/drm/drm_linux.c 13 Jan 2021 01:04:49 -0000 1.76 > +++ dev/pci/drm/drm_linux.c 26 Jan 2021 17:22:50 -0000 > @@ -110,14 +110,14 @@ schedule_timeout(long timeout) > { > struct sleep_state sls; > unsigned long deadline; > - int wait, spl; > + int wait, spl, timo = 0; > > MUTEX_ASSERT_LOCKED(&sch_mtx); > KASSERT(!cold); > > - sleep_setup(&sls, sch_ident, sch_priority, "schto"); > if (timeout != MAX_SCHEDULE_TIMEOUT) > - sleep_setup_timeout(&sls, timeout); > + timo = timeout; > + sleep_setup(&sls, sch_ident, sch_priority, "schto", timo); > > wait = (sch_proc == curproc && timeout > 0); > > @@ -125,11 +125,9 @@ schedule_timeout(long timeout) > MUTEX_OLDIPL(&sch_mtx) = splsched(); > mtx_leave(&sch_mtx); > > - sleep_setup_signal(&sls); > - > if (timeout != MAX_SCHEDULE_TIMEOUT) > deadline = jiffies + timeout; > - sleep_finish_all(&sls, wait); > + sleep_finish(&sls, wait); > if (timeout != MAX_SCHEDULE_TIMEOUT) > timeout = deadline - jiffies; > > Index: kern/kern_rwlock.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > retrieving revision 1.46 > diff -u -p -r1.46 kern_rwlock.c > --- kern/kern_rwlock.c 11 Jan 2021 18:49:38 -0000 1.46 > +++ kern/kern_rwlock.c 26 Jan 2021 17:20:11 -0000 > @@ -279,15 +279,13 @@ retry: > prio = op->wait_prio; > if (flags & RW_INTR) > prio |= PCATCH; > - sleep_setup(&sls, rwl, prio, rwl->rwl_name); > - if (flags & RW_INTR) > - sleep_setup_signal(&sls); > + sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0); > > do_sleep = !rw_cas(&rwl->rwl_owner, o, set); > > - sleep_finish(&sls, do_sleep); > + error = sleep_finish(&sls, do_sleep); > if ((flags & RW_INTR) && > - (error = sleep_finish_signal(&sls)) != 0) > + (error != 0)) > return (error); > if (flags & RW_SLEEPFAIL) > return (EAGAIN); > Index: kern/kern_sched.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sched.c,v > retrieving revision 1.68 > diff -u -p -r1.68 kern_sched.c > --- kern/kern_sched.c 9 Jan 2021 20:57:46 -0000 1.68 > +++ kern/kern_sched.c 26 Jan 2021 17:20:11 -0000 > @@ -674,7 +674,7 @@ sched_stop_secondary_cpus(void) > if (CPU_IS_PRIMARY(ci)) > continue; > while ((spc->spc_schedflags & SPCF_HALTED) == 0) { > - sleep_setup(&sls, spc, PZERO, "schedstate"); > + sleep_setup(&sls, spc, PZERO, "schedstate", 0); > sleep_finish(&sls, > (spc->spc_schedflags & SPCF_HALTED) == 0); > } > Index: kern/kern_sig.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sig.c,v > retrieving revision 1.270 > diff -u -p -r1.270 kern_sig.c > --- kern/kern_sig.c 25 Dec 2020 12:59:52 -0000 1.270 > +++ kern/kern_sig.c 1 Feb 2021 15:23:41 -0000 > @@ -2106,7 +2106,7 @@ single_thread_wait(struct process *pr, i > /* wait until they're all suspended */ > wait = pr->ps_singlecount > 0; > while (wait) { > - sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend"); > + sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend", 0); > wait = pr->ps_singlecount > 0; > sleep_finish(&sls, wait); > if (!recheck) > Index: kern/kern_synch.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_synch.c,v > retrieving revision 1.174 > diff -u -p -r1.174 kern_synch.c > --- kern/kern_synch.c 11 Jan 2021 13:55:53 -0000 1.174 > +++ kern/kern_synch.c 26 Jan 2021 18:19:52 -0000 > @@ -66,7 +66,7 @@ > #include <sys/ktrace.h> > #endif > > -int sleep_signal_check(struct proc *); > +int sleep_signal_check(void); > int thrsleep(struct proc *, struct sys___thrsleep_args *); > int thrsleep_unlock(void *); > > @@ -155,11 +155,8 @@ tsleep(const volatile void *ident, int p > return (0); > } > > - sleep_setup(&sls, ident, priority, wmesg); > - sleep_setup_timeout(&sls, timo); > - sleep_setup_signal(&sls); > - > - return sleep_finish_all(&sls, 1); > + sleep_setup(&sls, ident, priority, wmesg, timo); > + return sleep_finish(&sls, 1); > } > > int > @@ -250,8 +247,7 @@ msleep(const volatile void *ident, struc > return (0); > } > > - sleep_setup(&sls, ident, priority, wmesg); > - sleep_setup_timeout(&sls, timo); > + sleep_setup(&sls, ident, priority, wmesg, timo); > > /* XXX - We need to make sure that the mutex doesn't > * unblock splsched. This can be made a bit more > @@ -261,9 +257,7 @@ msleep(const volatile void *ident, struc > MUTEX_OLDIPL(mtx) = splsched(); > mtx_leave(mtx); > /* signal may stop the process, release mutex before that */ > - sleep_setup_signal(&sls); > - > - error = sleep_finish_all(&sls, 1); > + error = sleep_finish(&sls, 1); > > if ((priority & PNORELOCK) == 0) { > mtx_enter(mtx); > @@ -313,14 +307,11 @@ rwsleep(const volatile void *ident, stru > rw_assert_anylock(rwl); > status = rw_status(rwl); > > - sleep_setup(&sls, ident, priority, wmesg); > - sleep_setup_timeout(&sls, timo); > + sleep_setup(&sls, ident, priority, wmesg, 0); This needs to be sleep_setup(&sls, ident, priority, wmesg, timo); > > rw_exit(rwl); > /* signal may stop the process, release rwlock before that */ > - sleep_setup_signal(&sls); > - > - error = sleep_finish_all(&sls, 1); > + error = sleep_finish(&sls, 1); > > if ((priority & PNORELOCK) == 0) > rw_enter(rwl, status); > @@ -353,7 +344,7 @@ rwsleep_nsec(const volatile void *ident, > > void > sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, > - const char *wmesg) > + const char *wmesg, int timo) > { > struct proc *p = curproc; > > @@ -367,14 +358,12 @@ sleep_setup(struct sleep_state *sls, con > #endif > > sls->sls_catch = prio & PCATCH; > - sls->sls_do_sleep = 1; > sls->sls_locked = 0; > - sls->sls_sigerr = 0; > sls->sls_timeout = 0; > > /* > * The kernel has to be locked for signal processing. > - * This is done here and not in sleep_setup_signal() because > + * This is done here and not in sleep_finish() because > * KERNEL_LOCK() has to be taken before SCHED_LOCK(). > */ > if (sls->sls_catch != 0) { > @@ -391,35 +380,53 @@ sleep_setup(struct sleep_state *sls, con > p->p_slptime = 0; > p->p_slppri = prio & PRIMASK; > TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); > -} > - > -int > -sleep_finish_all(struct sleep_state *sls, int do_sleep) > -{ > - int error, error1; > - > - sleep_finish(sls, do_sleep); > - error1 = sleep_finish_timeout(sls); > - error = sleep_finish_signal(sls); > > - /* Signal errors are higher priority than timeouts. */ > - if (error == 0 && error1 != 0) > - error = error1; > - > - return error; > + KASSERT((p->p_flag & P_TIMEOUT) == 0); > + if (timo) { > + sls->sls_timeout = 1; > + timeout_add(&p->p_sleep_to, timo); > + } > } > > -void > +int > sleep_finish(struct sleep_state *sls, int do_sleep) > { > struct proc *p = curproc; > + int error = 0, error1 = 0; > + > + if (sls->sls_catch != 0) { > + /* sleep_setup() has locked the kernel. */ > + KERNEL_ASSERT_LOCKED(); > + > + /* > + * We put ourselves on the sleep queue and start our > + * timeout before calling sleep_signal_check(), as we could > + * stop there, and a wakeup or a SIGCONT (or both) could > + * occur while we were stopped. A SIGCONT would cause > + * us to be marked as SSLEEP without resuming us, thus > + * we must be ready for sleep when sleep_signal_check() is > + * called. > + * If the wakeup happens while we're stopped, p->p_wchan > + * will be NULL upon return from sleep_signal_check(). In > + * that case we need to unwind immediately. > + */ > + atomic_setbits_int(&p->p_flag, P_SINTR); > + if ((error = sleep_signal_check()) != 0) { > + p->p_stat = SONPROC; > + sls->sls_catch = 0; > + do_sleep = 0; > + } else if (p->p_wchan == NULL) { > + sls->sls_catch = 0; > + do_sleep = 0; > + } > + } > > - if (sls->sls_do_sleep && do_sleep) { > + if (do_sleep) { > p->p_stat = SSLEEP; > p->p_ru.ru_nvcsw++; > SCHED_ASSERT_LOCKED(); > mi_switch(); > - } else if (!do_sleep) { > + } else { > unsleep(p); > } > > @@ -436,30 +443,11 @@ sleep_finish(struct sleep_state *sls, in > * we need to clear it before the ktrace. > */ > atomic_clearbits_int(&p->p_flag, P_SINTR); > -} > - > -void > -sleep_setup_timeout(struct sleep_state *sls, int timo) > -{ > - struct proc *p = curproc; > - > - KASSERT((p->p_flag & P_TIMEOUT) == 0); > - > - if (timo) { > - sls->sls_timeout = 1; > - timeout_add(&p->p_sleep_to, timo); > - } > -} > - > -int > -sleep_finish_timeout(struct sleep_state *sls) > -{ > - struct proc *p = curproc; > > if (sls->sls_timeout) { > if (p->p_flag & P_TIMEOUT) { > atomic_clearbits_int(&p->p_flag, P_TIMEOUT); > - return (EWOULDBLOCK); > + error1 = EWOULDBLOCK; > } else { > /* This must not sleep. */ > timeout_del_barrier(&p->p_sleep_to); > @@ -467,78 +455,18 @@ sleep_finish_timeout(struct sleep_state > } > } > > - return (0); > -} > - > -int > -sleep_signal_check(struct proc *p) > -{ > - int err, sig; > - > - if ((err = single_thread_check(p, 1)) != 0) > - return err; > - if ((sig = CURSIG(p)) != 0) { > - if (p->p_p->ps_sigacts->ps_sigintr & sigmask(sig)) > - return EINTR; > - else > - return ERESTART; > - } > - return 0; > -} > - > -void > -sleep_setup_signal(struct sleep_state *sls) > -{ > - struct proc *p = curproc; > - > - if (sls->sls_catch == 0) > - return; > - > - /* sleep_setup() has locked the kernel. */ > - KERNEL_ASSERT_LOCKED(); > - > - /* > - * We put ourselves on the sleep queue and start our timeout before > - * calling single_thread_check or CURSIG, as we could stop there, and > - * a wakeup or a SIGCONT (or both) could occur while we were stopped. > - * A SIGCONT would cause us to be marked as SSLEEP without resuming us, > - * thus we must be ready for sleep when CURSIG is called. If the > - * wakeup happens while we're stopped, p->p_wchan will be 0 upon > - * return from single_thread_check or CURSIG. In that case we should > - * not go to sleep. If single_thread_check returns an error we need > - * to unwind immediately. That's achieved by saving the return value > - * in sls->sl_unwind and checking it later in sleep_finish_signal. > - */ > - atomic_setbits_int(&p->p_flag, P_SINTR); > - if ((sls->sls_sigerr = sleep_signal_check(p)) != 0) { > - unsleep(p); > - p->p_stat = SONPROC; > - sls->sls_do_sleep = 0; > - } else if (p->p_wchan == 0) { > - sls->sls_catch = 0; > - sls->sls_do_sleep = 0; > - } > -} > - > -int > -sleep_finish_signal(struct sleep_state *sls) > -{ > - struct proc *p = curproc; > - int error = 0; > - > - if (sls->sls_catch != 0) { > - KERNEL_ASSERT_LOCKED(); > - > - if (sls->sls_sigerr != 0) > - error = sls->sls_sigerr; > - else > - error = sleep_signal_check(p); > - } > + /* Check if thread was woken up because of a unwind or signal */ > + if (sls->sls_catch != 0) > + error = sleep_signal_check(); > > if (sls->sls_locked) > KERNEL_UNLOCK(); > > - return (error); > + /* Signal errors are higher priority than timeouts. */ > + if (error == 0 && error1 != 0) > + error = error1; > + > + return error; > } > > int > @@ -560,6 +488,27 @@ wakeup_proc(struct proc *p, const volati > return awakened; > } > > + > +/* > + * Check and handle signals and suspensions around a sleep cycle. > + */ > +int > +sleep_signal_check(void) > +{ > + struct proc *p = curproc; > + int err, sig; > + > + if ((err = single_thread_check(p, 1)) != 0) > + return err; > + if ((sig = CURSIG(p)) != 0) { > + if (p->p_p->ps_sigacts->ps_sigintr & sigmask(sig)) > + return EINTR; > + else > + return ERESTART; > + } > + return 0; > +} > + I would prefer if this function is above wakeup_proc() so that all sleep_* functions are together. > /* > * Implement timeout for tsleep. > * If process hasn't been awakened (wchan non-zero), > @@ -895,7 +844,7 @@ refcnt_finalize(struct refcnt *r, const > > refcnt = atomic_dec_int_nv(&r->refs); > while (refcnt) { > - sleep_setup(&sls, r, PWAIT, wmesg); > + sleep_setup(&sls, r, PWAIT, wmesg, 0); > refcnt = r->refs; > sleep_finish(&sls, refcnt); > } > @@ -923,7 +872,7 @@ cond_wait(struct cond *c, const char *wm > > wait = c->c_wait; > while (wait) { > - sleep_setup(&sls, c, PWAIT, wmesg); > + sleep_setup(&sls, c, PWAIT, wmesg, 0); > wait = c->c_wait; > sleep_finish(&sls, wait); > } > Index: kern/kern_timeout.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_timeout.c,v > retrieving revision 1.82 > diff -u -p -r1.82 kern_timeout.c > --- kern/kern_timeout.c 20 Oct 2020 22:37:12 -0000 1.82 > +++ kern/kern_timeout.c 26 Jan 2021 17:20:11 -0000 > @@ -787,7 +787,7 @@ softclock_thread(void *arg) > > s = splsoftclock(); > for (;;) { > - sleep_setup(&sls, &timeout_proc, PSWP, "bored"); > + sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0); > sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); > > mtx_enter(&timeout_mutex); > Index: kern/subr_log.c > =================================================================== > RCS file: /cvs/src/sys/kern/subr_log.c,v > retrieving revision 1.71 > diff -u -p -r1.71 subr_log.c > --- kern/subr_log.c 8 Jan 2021 11:23:57 -0000 1.71 > +++ kern/subr_log.c 26 Jan 2021 17:20:11 -0000 > @@ -246,10 +246,8 @@ logread(dev_t dev, struct uio *uio, int > * Set up and enter sleep manually instead of using msleep() > * to keep log_mtx as a leaf lock. > */ > - sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog"); > - sleep_setup_signal(&sls); > - sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT); > - error = sleep_finish_signal(&sls); > + sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog", 0); > + error = sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT); > mtx_enter(&log_mtx); > if (error) > goto out; > Index: sys/proc.h > =================================================================== > RCS file: /cvs/src/sys/sys/proc.h,v > retrieving revision 1.306 > diff -u -p -r1.306 proc.h > --- sys/proc.h 18 Jan 2021 18:47:05 -0000 1.306 > +++ sys/proc.h 26 Jan 2021 17:21:09 -0000 > @@ -625,9 +625,7 @@ int proc_cansugid(struct proc *); > struct sleep_state { > int sls_s; > int sls_catch; > - int sls_do_sleep; > int sls_locked; > - int sls_sigerr; > int sls_timeout; > }; > > Index: sys/systm.h > =================================================================== > RCS file: /cvs/src/sys/sys/systm.h,v > retrieving revision 1.150 > diff -u -p -r1.150 systm.h > --- sys/systm.h 27 Dec 2020 11:38:35 -0000 1.150 > +++ sys/systm.h 26 Jan 2021 17:20:11 -0000 > @@ -257,13 +257,8 @@ void stop_periodic_resettodr(void); > > struct sleep_state; > void sleep_setup(struct sleep_state *, const volatile void *, int, > - const char *); > -void sleep_setup_timeout(struct sleep_state *, int); > -void sleep_setup_signal(struct sleep_state *); > -void sleep_finish(struct sleep_state *, int); > -int sleep_finish_timeout(struct sleep_state *); > -int sleep_finish_signal(struct sleep_state *); > -int sleep_finish_all(struct sleep_state *, int); > + const char *, int); > +int sleep_finish(struct sleep_state *, int); > void sleep_queue_init(void); > > struct cond; > With this fix it works for me now. -- :wq Claudio