> Date: Sat, 2 Oct 2021 19:55:49 +0200 > From: Martin Pieuchot <m...@openbsd.org> > > When a thread running on a CPU schedules itself out, it does the following > (pseudo_code): > > SCHED_LOCK() > curproc->p_stat = SSLEEP; > // some more operations > mi_switch() > > The problem with this is that any instrumentation between setting `p_stat' > and cpu_switchto() is incorrect because 'curproc' is still being executed > and is not yet sleeping. Its `p_stat' should be SONPROC and not SSLEEP.
Hmm, well, we're holding the scheduler lock, so nothing should really look at our state at this point... > It is possible to reproduce the problem with the following btrace(8) script: > > tracepoint:sched:enqueue { printf("%d -> enqueue (%d)\n", arg0, arg1); } > tracepoint:sched:dequeue { printf("%d <- dequeue (%d)\n", arg0, arg1); } > tracepoint:sched:on__cpu { printf("%d -- on cpu (%d)\n", tid, pid); } > > At which point the KASSERT() in wakeup_n() triggers if `curproc' is going to > sleep and its sleep channel collides with the running btrace(8) program: > > dt_prov_static_hook() at dt_prov_static_hook+0xe4 > remrunqueue() at remrunqueue+0x1a4 > sched_chooseproc() at sched_chooseproc+0x200 > mi_switch() at mi_switch+0x178 > sleep_finish() at sleep_finish+0x1d0 > tsleep() at tsleep+0x100 > biowait() at biowait+0x4c > ffs_read() at ffs_read+0x1c0 > VOP_READ() at VOP_READ+0x44 > vn_read() at vn_read+0x84 > dofilereadv() at dofilereadv+0x8c > sys_read() at sys_read+0x5c which suggests that something fishy is going on here. Did we accidentally introduce a sleeping point in the scheduler? > To fix this we should set `p_stat' as late a possible, diff below does that > just before calling cpu_switchto(). > > Note that there's an exception for SRUN because setrunqueue() change `p_stat' > to indicate the thread is on a queue. I'll discuss that in an upcoming diff. > > ok? I'm not necessarily against this diff, but it may hide bugs. And... > Index: kern/kern_sched.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sched.c,v > retrieving revision 1.73 > diff -u -p -r1.73 kern_sched.c > --- kern/kern_sched.c 9 Sep 2021 18:41:39 -0000 1.73 > +++ kern/kern_sched.c 2 Oct 2021 17:00:52 -0000 > @@ -144,10 +144,9 @@ sched_idle(void *v) > */ > SCHED_LOCK(s); > cpuset_add(&sched_idle_cpus, ci); > - p->p_stat = SSLEEP; > p->p_cpu = ci; > atomic_setbits_int(&p->p_flag, P_CPUPEG); > - mi_switch(); > + mi_switch(SSLEEP); > cpuset_del(&sched_idle_cpus, ci); > SCHED_UNLOCK(s); > > @@ -159,8 +158,7 @@ sched_idle(void *v) > struct proc *dead; > > SCHED_LOCK(s); > - p->p_stat = SSLEEP; > - mi_switch(); > + mi_switch(SSLEEP); > SCHED_UNLOCK(s); > > while ((dead = LIST_FIRST(&spc->spc_deadproc))) { > @@ -625,7 +623,7 @@ sched_peg_curproc(struct cpu_info *ci) > atomic_setbits_int(&p->p_flag, P_CPUPEG); > setrunqueue(ci, p, p->p_usrpri); > p->p_ru.ru_nvcsw++; > - mi_switch(); > + mi_switch(SRUN); > SCHED_UNLOCK(s); > } > > Index: kern/kern_synch.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_synch.c,v > retrieving revision 1.179 > diff -u -p -r1.179 kern_synch.c > --- kern/kern_synch.c 9 Sep 2021 18:41:39 -0000 1.179 > +++ kern/kern_synch.c 2 Oct 2021 17:00:52 -0000 > @@ -421,10 +421,9 @@ sleep_finish(struct sleep_state *sls, in > } > > if (do_sleep) { > - p->p_stat = SSLEEP; > p->p_ru.ru_nvcsw++; > SCHED_ASSERT_LOCKED(); > - mi_switch(); > + mi_switch(SSLEEP); > } else { > unsleep(p); > } > @@ -603,7 +602,7 @@ sys_sched_yield(struct proc *p, void *v, > newprio = max(newprio, q->p_runpri); > setrunqueue(p->p_cpu, p, newprio); > p->p_ru.ru_nvcsw++; > - mi_switch(); > + mi_switch(SRUN); > SCHED_UNLOCK(s); > > return (0); > Index: kern/kern_sig.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sig.c,v > retrieving revision 1.283 > diff -u -p -r1.283 kern_sig.c > --- kern/kern_sig.c 28 Sep 2021 10:00:18 -0000 1.283 > +++ kern/kern_sig.c 2 Oct 2021 17:00:52 -0000 > @@ -1347,7 +1347,6 @@ proc_stop(struct proc *p, int sw) > SCHED_ASSERT_LOCKED(); > #endif > > - p->p_stat = SSTOP; > atomic_clearbits_int(&pr->ps_flags, PS_WAITED); > atomic_setbits_int(&pr->ps_flags, PS_STOPPED); > atomic_setbits_int(&p->p_flag, P_SUSPSIG); > @@ -1357,7 +1356,7 @@ proc_stop(struct proc *p, int sw) > */ > softintr_schedule(proc_stop_si); > if (sw) > - mi_switch(); > + mi_switch(SSTOP); > } > > /* > @@ -1979,8 +1978,7 @@ single_thread_check_locked(struct proc * > } > > /* not exiting and don't need to unwind, so suspend */ > - p->p_stat = SSTOP; > - mi_switch(); > + mi_switch(SSTOP); > } while (pr->ps_single != NULL); > } > > Index: kern/sched_bsd.c > =================================================================== > RCS file: /cvs/src/sys/kern/sched_bsd.c,v > retrieving revision 1.69 > diff -u -p -r1.69 sched_bsd.c > --- kern/sched_bsd.c 9 Sep 2021 18:41:39 -0000 1.69 > +++ kern/sched_bsd.c 2 Oct 2021 17:01:26 -0000 > @@ -304,7 +304,7 @@ yield(void) > SCHED_LOCK(s); > setrunqueue(p->p_cpu, p, p->p_usrpri); > p->p_ru.ru_nvcsw++; > - mi_switch(); > + mi_switch(SRUN); > SCHED_UNLOCK(s); > } > > @@ -323,12 +323,12 @@ preempt(void) > SCHED_LOCK(s); > setrunqueue(p->p_cpu, p, p->p_usrpri); > p->p_ru.ru_nivcsw++; > - mi_switch(); > + mi_switch(SRUN); > SCHED_UNLOCK(s); > } > > void > -mi_switch(void) > +mi_switch(uint8_t pstat) ...using uint8_t here seems weird; p_pstat is a char. > { > struct schedstate_percpu *spc = &curcpu()->ci_schedstate; > struct proc *p = curproc; > @@ -341,7 +341,7 @@ mi_switch(void) > #endif > > assertwaitok(); > - KASSERT(p->p_stat != SONPROC); > + KASSERT(pstat != SONPROC); > > SCHED_ASSERT_LOCKED(); > > @@ -389,6 +389,7 @@ mi_switch(void) > uvmexp.swtch++; > TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, > nextproc->p_p->ps_pid); > + p->p_stat = pstat; > cpu_switchto(p, nextproc); > TRACEPOINT(sched, on__cpu, NULL); > } else { > Index: sys/sched.h > =================================================================== > RCS file: /cvs/src/sys/sys/sched.h,v > retrieving revision 1.57 > diff -u -p -r1.57 sched.h > --- sys/sched.h 25 Dec 2020 12:49:31 -0000 1.57 > +++ sys/sched.h 2 Oct 2021 17:00:52 -0000 > @@ -155,7 +155,7 @@ void userret(struct proc *p); > void sched_init_cpu(struct cpu_info *); > void sched_idle(void *); > void sched_exit(struct proc *); > -void mi_switch(void); > +void mi_switch(uint8_t); > void cpu_switchto(struct proc *, struct proc *); > struct proc *sched_chooseproc(void); > struct cpu_info *sched_choosecpu(struct proc *); > >