When a thread running on a CPU schedules itself out, it does the following
(pseudo_code):

        SCHED_LOCK()
        curproc->p_stat = SSLEEP;
        // some more operations
        mi_switch()

The problem with this is that any instrumentation between setting `p_stat'
and cpu_switchto() is incorrect because 'curproc' is still being executed
and is not yet sleeping.  Its `p_stat' should be SONPROC and not SSLEEP.

It is possible to reproduce the problem with the following btrace(8) script:

  tracepoint:sched:enqueue { printf("%d -> enqueue (%d)\n", arg0, arg1); }
  tracepoint:sched:dequeue { printf("%d <- dequeue (%d)\n", arg0, arg1); }
  tracepoint:sched:on__cpu { printf("%d -- on cpu (%d)\n", tid, pid); }

At which point the KASSERT() in wakeup_n() triggers if `curproc' is going to
sleep and its sleep channel collides with the running btrace(8) program:

  dt_prov_static_hook() at dt_prov_static_hook+0xe4
  remrunqueue() at remrunqueue+0x1a4
  sched_chooseproc() at sched_chooseproc+0x200
  mi_switch() at mi_switch+0x178
  sleep_finish() at sleep_finish+0x1d0
  tsleep() at tsleep+0x100
  biowait() at biowait+0x4c
  ffs_read() at ffs_read+0x1c0
  VOP_READ() at VOP_READ+0x44
  vn_read() at vn_read+0x84
  dofilereadv() at dofilereadv+0x8c
  sys_read() at sys_read+0x5c

To fix this we should set `p_stat' as late a possible, diff below does that
just before calling cpu_switchto().

Note that there's an exception for SRUN because setrunqueue() change `p_stat'
to indicate the thread is on a queue.  I'll discuss that in an upcoming diff.

ok?

Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.73
diff -u -p -r1.73 kern_sched.c
--- kern/kern_sched.c   9 Sep 2021 18:41:39 -0000       1.73
+++ kern/kern_sched.c   2 Oct 2021 17:00:52 -0000
@@ -144,10 +144,9 @@ sched_idle(void *v)
         */
        SCHED_LOCK(s);
        cpuset_add(&sched_idle_cpus, ci);
-       p->p_stat = SSLEEP;
        p->p_cpu = ci;
        atomic_setbits_int(&p->p_flag, P_CPUPEG);
-       mi_switch();
+       mi_switch(SSLEEP);
        cpuset_del(&sched_idle_cpus, ci);
        SCHED_UNLOCK(s);
 
@@ -159,8 +158,7 @@ sched_idle(void *v)
                        struct proc *dead;
 
                        SCHED_LOCK(s);
-                       p->p_stat = SSLEEP;
-                       mi_switch();
+                       mi_switch(SSLEEP);
                        SCHED_UNLOCK(s);
 
                        while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
@@ -625,7 +623,7 @@ sched_peg_curproc(struct cpu_info *ci)
        atomic_setbits_int(&p->p_flag, P_CPUPEG);
        setrunqueue(ci, p, p->p_usrpri);
        p->p_ru.ru_nvcsw++;
-       mi_switch();
+       mi_switch(SRUN);
        SCHED_UNLOCK(s);
 }
 
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.179
diff -u -p -r1.179 kern_synch.c
--- kern/kern_synch.c   9 Sep 2021 18:41:39 -0000       1.179
+++ kern/kern_synch.c   2 Oct 2021 17:00:52 -0000
@@ -421,10 +421,9 @@ sleep_finish(struct sleep_state *sls, in
        }
 
        if (do_sleep) {
-               p->p_stat = SSLEEP;
                p->p_ru.ru_nvcsw++;
                SCHED_ASSERT_LOCKED();
-               mi_switch();
+               mi_switch(SSLEEP);
        } else {
                unsleep(p);
        }
@@ -603,7 +602,7 @@ sys_sched_yield(struct proc *p, void *v,
                newprio = max(newprio, q->p_runpri);
        setrunqueue(p->p_cpu, p, newprio);
        p->p_ru.ru_nvcsw++;
-       mi_switch();
+       mi_switch(SRUN);
        SCHED_UNLOCK(s);
 
        return (0);
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.283
diff -u -p -r1.283 kern_sig.c
--- kern/kern_sig.c     28 Sep 2021 10:00:18 -0000      1.283
+++ kern/kern_sig.c     2 Oct 2021 17:00:52 -0000
@@ -1347,7 +1347,6 @@ proc_stop(struct proc *p, int sw)
        SCHED_ASSERT_LOCKED();
 #endif
 
-       p->p_stat = SSTOP;
        atomic_clearbits_int(&pr->ps_flags, PS_WAITED);
        atomic_setbits_int(&pr->ps_flags, PS_STOPPED);
        atomic_setbits_int(&p->p_flag, P_SUSPSIG);
@@ -1357,7 +1356,7 @@ proc_stop(struct proc *p, int sw)
         */
        softintr_schedule(proc_stop_si);
        if (sw)
-               mi_switch();
+               mi_switch(SSTOP);
 }
 
 /*
@@ -1979,8 +1978,7 @@ single_thread_check_locked(struct proc *
                        }
 
                        /* not exiting and don't need to unwind, so suspend */
-                       p->p_stat = SSTOP;
-                       mi_switch();
+                       mi_switch(SSTOP);
                } while (pr->ps_single != NULL);
        }
 
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.69
diff -u -p -r1.69 sched_bsd.c
--- kern/sched_bsd.c    9 Sep 2021 18:41:39 -0000       1.69
+++ kern/sched_bsd.c    2 Oct 2021 17:01:26 -0000
@@ -304,7 +304,7 @@ yield(void)
        SCHED_LOCK(s);
        setrunqueue(p->p_cpu, p, p->p_usrpri);
        p->p_ru.ru_nvcsw++;
-       mi_switch();
+       mi_switch(SRUN);
        SCHED_UNLOCK(s);
 }
 
@@ -323,12 +323,12 @@ preempt(void)
        SCHED_LOCK(s);
        setrunqueue(p->p_cpu, p, p->p_usrpri);
        p->p_ru.ru_nivcsw++;
-       mi_switch();
+       mi_switch(SRUN);
        SCHED_UNLOCK(s);
 }
 
 void
-mi_switch(void)
+mi_switch(uint8_t pstat)
 {
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
        struct proc *p = curproc;
@@ -341,7 +341,7 @@ mi_switch(void)
 #endif
 
        assertwaitok();
-       KASSERT(p->p_stat != SONPROC);
+       KASSERT(pstat != SONPROC);
 
        SCHED_ASSERT_LOCKED();
 
@@ -389,6 +389,7 @@ mi_switch(void)
                uvmexp.swtch++;
                TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,
                    nextproc->p_p->ps_pid);
+               p->p_stat = pstat;
                cpu_switchto(p, nextproc);
                TRACEPOINT(sched, on__cpu, NULL);
        } else {
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.57
diff -u -p -r1.57 sched.h
--- sys/sched.h 25 Dec 2020 12:49:31 -0000      1.57
+++ sys/sched.h 2 Oct 2021 17:00:52 -0000
@@ -155,7 +155,7 @@ void userret(struct proc *p);
 void sched_init_cpu(struct cpu_info *);
 void sched_idle(void *);
 void sched_exit(struct proc *);
-void mi_switch(void);
+void mi_switch(uint8_t);
 void cpu_switchto(struct proc *, struct proc *);
 struct proc *sched_chooseproc(void);
 struct cpu_info *sched_choosecpu(struct proc *);

Reply via email to