> Date: Sat, 2 Oct 2021 19:55:49 +0200
> From: Martin Pieuchot <m...@openbsd.org>
> 
> When a thread running on a CPU schedules itself out, it does the following
> (pseudo_code):
> 
>       SCHED_LOCK()
>       curproc->p_stat = SSLEEP;
>       // some more operations
>       mi_switch()
> 
> The problem with this is that any instrumentation between setting `p_stat'
> and cpu_switchto() is incorrect because 'curproc' is still being executed
> and is not yet sleeping.  Its `p_stat' should be SONPROC and not SSLEEP.

Hmm, well, we're holding the scheduler lock, so nothing should really
look at our state at this point...

> It is possible to reproduce the problem with the following btrace(8) script:
> 
>   tracepoint:sched:enqueue { printf("%d -> enqueue (%d)\n", arg0, arg1); }
>   tracepoint:sched:dequeue { printf("%d <- dequeue (%d)\n", arg0, arg1); }
>   tracepoint:sched:on__cpu { printf("%d -- on cpu (%d)\n", tid, pid); }
> 
> At which point the KASSERT() in wakeup_n() triggers if `curproc' is going to
> sleep and its sleep channel collides with the running btrace(8) program:
> 
>   dt_prov_static_hook() at dt_prov_static_hook+0xe4
>   remrunqueue() at remrunqueue+0x1a4
>   sched_chooseproc() at sched_chooseproc+0x200
>   mi_switch() at mi_switch+0x178
>   sleep_finish() at sleep_finish+0x1d0
>   tsleep() at tsleep+0x100
>   biowait() at biowait+0x4c
>   ffs_read() at ffs_read+0x1c0
>   VOP_READ() at VOP_READ+0x44
>   vn_read() at vn_read+0x84
>   dofilereadv() at dofilereadv+0x8c
>   sys_read() at sys_read+0x5c

which suggests that something fishy is going on here.  Did we
accidentally introduce a sleeping point in the scheduler?

> To fix this we should set `p_stat' as late a possible, diff below does that
> just before calling cpu_switchto().
> 
> Note that there's an exception for SRUN because setrunqueue() change `p_stat'
> to indicate the thread is on a queue.  I'll discuss that in an upcoming diff.
> 
> ok?

I'm not necessarily against this diff, but it may hide bugs.  And...

> Index: kern/kern_sched.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sched.c,v
> retrieving revision 1.73
> diff -u -p -r1.73 kern_sched.c
> --- kern/kern_sched.c 9 Sep 2021 18:41:39 -0000       1.73
> +++ kern/kern_sched.c 2 Oct 2021 17:00:52 -0000
> @@ -144,10 +144,9 @@ sched_idle(void *v)
>        */
>       SCHED_LOCK(s);
>       cpuset_add(&sched_idle_cpus, ci);
> -     p->p_stat = SSLEEP;
>       p->p_cpu = ci;
>       atomic_setbits_int(&p->p_flag, P_CPUPEG);
> -     mi_switch();
> +     mi_switch(SSLEEP);
>       cpuset_del(&sched_idle_cpus, ci);
>       SCHED_UNLOCK(s);
>  
> @@ -159,8 +158,7 @@ sched_idle(void *v)
>                       struct proc *dead;
>  
>                       SCHED_LOCK(s);
> -                     p->p_stat = SSLEEP;
> -                     mi_switch();
> +                     mi_switch(SSLEEP);
>                       SCHED_UNLOCK(s);
>  
>                       while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
> @@ -625,7 +623,7 @@ sched_peg_curproc(struct cpu_info *ci)
>       atomic_setbits_int(&p->p_flag, P_CPUPEG);
>       setrunqueue(ci, p, p->p_usrpri);
>       p->p_ru.ru_nvcsw++;
> -     mi_switch();
> +     mi_switch(SRUN);
>       SCHED_UNLOCK(s);
>  }
>  
> Index: kern/kern_synch.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.179
> diff -u -p -r1.179 kern_synch.c
> --- kern/kern_synch.c 9 Sep 2021 18:41:39 -0000       1.179
> +++ kern/kern_synch.c 2 Oct 2021 17:00:52 -0000
> @@ -421,10 +421,9 @@ sleep_finish(struct sleep_state *sls, in
>       }
>  
>       if (do_sleep) {
> -             p->p_stat = SSLEEP;
>               p->p_ru.ru_nvcsw++;
>               SCHED_ASSERT_LOCKED();
> -             mi_switch();
> +             mi_switch(SSLEEP);
>       } else {
>               unsleep(p);
>       }
> @@ -603,7 +602,7 @@ sys_sched_yield(struct proc *p, void *v,
>               newprio = max(newprio, q->p_runpri);
>       setrunqueue(p->p_cpu, p, newprio);
>       p->p_ru.ru_nvcsw++;
> -     mi_switch();
> +     mi_switch(SRUN);
>       SCHED_UNLOCK(s);
>  
>       return (0);
> Index: kern/kern_sig.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> retrieving revision 1.283
> diff -u -p -r1.283 kern_sig.c
> --- kern/kern_sig.c   28 Sep 2021 10:00:18 -0000      1.283
> +++ kern/kern_sig.c   2 Oct 2021 17:00:52 -0000
> @@ -1347,7 +1347,6 @@ proc_stop(struct proc *p, int sw)
>       SCHED_ASSERT_LOCKED();
>  #endif
>  
> -     p->p_stat = SSTOP;
>       atomic_clearbits_int(&pr->ps_flags, PS_WAITED);
>       atomic_setbits_int(&pr->ps_flags, PS_STOPPED);
>       atomic_setbits_int(&p->p_flag, P_SUSPSIG);
> @@ -1357,7 +1356,7 @@ proc_stop(struct proc *p, int sw)
>        */
>       softintr_schedule(proc_stop_si);
>       if (sw)
> -             mi_switch();
> +             mi_switch(SSTOP);
>  }
>  
>  /*
> @@ -1979,8 +1978,7 @@ single_thread_check_locked(struct proc *
>                       }
>  
>                       /* not exiting and don't need to unwind, so suspend */
> -                     p->p_stat = SSTOP;
> -                     mi_switch();
> +                     mi_switch(SSTOP);
>               } while (pr->ps_single != NULL);
>       }
>  
> Index: kern/sched_bsd.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 sched_bsd.c
> --- kern/sched_bsd.c  9 Sep 2021 18:41:39 -0000       1.69
> +++ kern/sched_bsd.c  2 Oct 2021 17:01:26 -0000
> @@ -304,7 +304,7 @@ yield(void)
>       SCHED_LOCK(s);
>       setrunqueue(p->p_cpu, p, p->p_usrpri);
>       p->p_ru.ru_nvcsw++;
> -     mi_switch();
> +     mi_switch(SRUN);
>       SCHED_UNLOCK(s);
>  }
>  
> @@ -323,12 +323,12 @@ preempt(void)
>       SCHED_LOCK(s);
>       setrunqueue(p->p_cpu, p, p->p_usrpri);
>       p->p_ru.ru_nivcsw++;
> -     mi_switch();
> +     mi_switch(SRUN);
>       SCHED_UNLOCK(s);
>  }
>  
>  void
> -mi_switch(void)
> +mi_switch(uint8_t pstat)

...using uint8_t here seems weird; p_pstat is a char.

>  {
>       struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
>       struct proc *p = curproc;
> @@ -341,7 +341,7 @@ mi_switch(void)
>  #endif
>  
>       assertwaitok();
> -     KASSERT(p->p_stat != SONPROC);
> +     KASSERT(pstat != SONPROC);
>  
>       SCHED_ASSERT_LOCKED();
>  
> @@ -389,6 +389,7 @@ mi_switch(void)
>               uvmexp.swtch++;
>               TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,
>                   nextproc->p_p->ps_pid);
> +             p->p_stat = pstat;
>               cpu_switchto(p, nextproc);
>               TRACEPOINT(sched, on__cpu, NULL);
>       } else {
> Index: sys/sched.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/sched.h,v
> retrieving revision 1.57
> diff -u -p -r1.57 sched.h
> --- sys/sched.h       25 Dec 2020 12:49:31 -0000      1.57
> +++ sys/sched.h       2 Oct 2021 17:00:52 -0000
> @@ -155,7 +155,7 @@ void userret(struct proc *p);
>  void sched_init_cpu(struct cpu_info *);
>  void sched_idle(void *);
>  void sched_exit(struct proc *);
> -void mi_switch(void);
> +void mi_switch(uint8_t);
>  void cpu_switchto(struct proc *, struct proc *);
>  struct proc *sched_chooseproc(void);
>  struct cpu_info *sched_choosecpu(struct proc *);
> 
> 

Reply via email to