Author: kib
Date: Thu Jul 28 09:09:55 2016
New Revision: 303426
URL: https://svnweb.freebsd.org/changeset/base/303426

Log:
  Rewrite subr_sleepqueue.c use of callouts to not depend on the
  specifics of callout KPI.  Esp., do not depend on the exact interface
  of callout_stop(9) return values.
  
  The main change is that instead of requiring precise callouts, code
  maintains absolute time to wake up.  Callouts now should ensure that a
  wake occurs at the requested moment, but we can tolerate both run-away
  callout, and callout_stop(9) lying about running callout either way.
  
  As consequence, it removes the constant source of the bugs where
  sleepq_check_timeout() causes uninterruptible thread state where the
  thread is detached from CPU, see e.g. r234952 and r296320.
  
  Patch also removes dual meaning of the TDF_TIMEOUT flag, making code
  (IMO much) simpler to reason about.
  
  Tested by:    pho
  Reviewed by:  jhb
  Sponsored by: The FreeBSD Foundation
  MFC after:    1 month
  Differential revision:        https://reviews.freebsd.org/D7137

Modified:
  head/sys/ddb/db_ps.c
  head/sys/kern/kern_thread.c
  head/sys/kern/subr_sleepqueue.c
  head/sys/sys/proc.h

Modified: head/sys/ddb/db_ps.c
==============================================================================
--- head/sys/ddb/db_ps.c        Thu Jul 28 08:57:01 2016        (r303425)
+++ head/sys/ddb/db_ps.c        Thu Jul 28 09:09:55 2016        (r303426)
@@ -375,8 +375,13 @@ DB_SHOW_COMMAND(thread, db_show_thread)
                db_printf(" lock: %s  turnstile: %p\n", td->td_lockname,
                    td->td_blocked);
        if (TD_ON_SLEEPQ(td))
-               db_printf(" wmesg: %s  wchan: %p\n", td->td_wmesg,
-                   td->td_wchan);
+               db_printf(
+           " wmesg: %s  wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
+                   td->td_wmesg, td->td_wchan,
+                   (long)sbttobt(td->td_sleeptimo).sec,
+                   (uintmax_t)sbttobt(td->td_sleeptimo).frac,
+                   (long)sbttobt(sbinuptime()).sec,
+                   (uintmax_t)sbttobt(sbinuptime()).frac);
        db_printf(" priority: %d\n", td->td_priority);
        db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
        if (td->td_swvoltick != 0) {

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c Thu Jul 28 08:57:01 2016        (r303425)
+++ head/sys/kern/kern_thread.c Thu Jul 28 09:09:55 2016        (r303426)
@@ -318,7 +318,7 @@ thread_reap(void)
 
        /*
         * Don't even bother to lock if none at this instant,
-        * we really don't care about the next instant..
+        * we really don't care about the next instant.
         */
        if (!TAILQ_EMPTY(&zombie_threads)) {
                mtx_lock_spin(&zombie_lock);
@@ -383,6 +383,7 @@ thread_free(struct thread *td)
        if (td->td_kstack != 0)
                vm_thread_dispose(td);
        vm_domain_policy_cleanup(&td->td_vm_dom_policy);
+       callout_drain(&td->td_slpcallout);
        uma_zfree(thread_zone, td);
 }
 
@@ -580,6 +581,7 @@ thread_wait(struct proc *p)
        td->td_cpuset = NULL;
        cpu_thread_clean(td);
        thread_cow_free(td);
+       callout_drain(&td->td_slpcallout);
        thread_reap();  /* check for zombie threads etc. */
 }
 

Modified: head/sys/kern/subr_sleepqueue.c
==============================================================================
--- head/sys/kern/subr_sleepqueue.c     Thu Jul 28 08:57:01 2016        
(r303425)
+++ head/sys/kern/subr_sleepqueue.c     Thu Jul 28 09:09:55 2016        
(r303426)
@@ -378,6 +378,7 @@ sleepq_set_timeout_sbt(void *wchan, sbin
 {
        struct sleepqueue_chain *sc;
        struct thread *td;
+       sbintime_t pr1;
 
        td = curthread;
        sc = SC_LOOKUP(wchan);
@@ -387,8 +388,14 @@ sleepq_set_timeout_sbt(void *wchan, sbin
        MPASS(wchan != NULL);
        if (cold)
                panic("timed sleep before timers are working");
-       callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
-           sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC);
+       KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
+           td->td_tid, td, (uintmax_t)td->td_sleeptimo));
+       thread_lock(td);
+       callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
+       thread_unlock(td);
+       callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
+           sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
+           C_DIRECT_EXEC);
 }
 
 /*
@@ -576,37 +583,36 @@ static int
 sleepq_check_timeout(void)
 {
        struct thread *td;
+       int res;
 
        td = curthread;
        THREAD_LOCK_ASSERT(td, MA_OWNED);
 
        /*
-        * If TDF_TIMEOUT is set, we timed out.
+        * If TDF_TIMEOUT is set, we timed out.  But recheck
+        * td_sleeptimo anyway.
         */
-       if (td->td_flags & TDF_TIMEOUT) {
-               td->td_flags &= ~TDF_TIMEOUT;
-               return (EWOULDBLOCK);
+       res = 0;
+       if (td->td_sleeptimo != 0) {
+               if (td->td_sleeptimo <= sbinuptime())
+                       res = EWOULDBLOCK;
+               td->td_sleeptimo = 0;
        }
-
-       /*
-        * If TDF_TIMOFAIL is set, the timeout ran after we had
-        * already been woken up.
-        */
-       if (td->td_flags & TDF_TIMOFAIL)
-               td->td_flags &= ~TDF_TIMOFAIL;
-
-       /*
-        * If callout_stop() fails, then the timeout is running on
-        * another CPU, so synchronize with it to avoid having it
-        * accidentally wake up a subsequent sleep.
-        */
-       else if (_callout_stop_safe(&td->td_slpcallout, CS_EXECUTING, NULL)
-           == 0) {
-               td->td_flags |= TDF_TIMEOUT;
-               TD_SET_SLEEPING(td);
-               mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
-       }
-       return (0);
+       if (td->td_flags & TDF_TIMEOUT)
+               td->td_flags &= ~TDF_TIMEOUT;
+       else
+               /*
+                * We ignore the situation where timeout subsystem was
+                * unable to stop our callout.  The struct thread is
+                * type-stable, the callout will use the correct
+                * memory when running.  The checks of the
+                * td_sleeptimo value in this function and in
+                * sleepq_timeout() ensure that the thread does not
+                * get spurious wakeups, even if the callout was reset
+                * or thread reused.
+                */
+               callout_stop(&td->td_slpcallout);
+       return (res);
 }
 
 /*
@@ -914,12 +920,17 @@ sleepq_timeout(void *arg)
        CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
            (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 
-       /*
-        * First, see if the thread is asleep and get the wait channel if
-        * it is.
-        */
        thread_lock(td);
-       if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
+
+       if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
+               /*
+                * The thread does not want a timeout (yet).
+                */
+       } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
+               /*
+                * See if the thread is asleep and get the wait
+                * channel if it is.
+                */
                wchan = td->td_wchan;
                sc = SC_LOOKUP(wchan);
                THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
@@ -927,40 +938,16 @@ sleepq_timeout(void *arg)
                MPASS(sq != NULL);
                td->td_flags |= TDF_TIMEOUT;
                wakeup_swapper = sleepq_resume_thread(sq, td, 0);
-               thread_unlock(td);
-               if (wakeup_swapper)
-                       kick_proc0();
-               return;
-       }
-
-       /*
-        * If the thread is on the SLEEPQ but isn't sleeping yet, it
-        * can either be on another CPU in between sleepq_add() and
-        * one of the sleepq_*wait*() routines or it can be in
-        * sleepq_catch_signals().
-        */
-       if (TD_ON_SLEEPQ(td)) {
+       } else if (TD_ON_SLEEPQ(td)) {
+               /*
+                * If the thread is on the SLEEPQ but isn't sleeping
+                * yet, it can either be on another CPU in between
+                * sleepq_add() and one of the sleepq_*wait*()
+                * routines or it can be in sleepq_catch_signals().
+                */
                td->td_flags |= TDF_TIMEOUT;
-               thread_unlock(td);
-               return;
        }
 
-       /*
-        * Now check for the edge cases.  First, if TDF_TIMEOUT is set,
-        * then the other thread has already yielded to us, so clear
-        * the flag and resume it.  If TDF_TIMEOUT is not set, then the
-        * we know that the other thread is not on a sleep queue, but it
-        * hasn't resumed execution yet.  In that case, set TDF_TIMOFAIL
-        * to let it know that the timeout has already run and doesn't
-        * need to be canceled.
-        */
-       if (td->td_flags & TDF_TIMEOUT) {
-               MPASS(TD_IS_SLEEPING(td));
-               td->td_flags &= ~TDF_TIMEOUT;
-               TD_CLR_SLEEPING(td);
-               wakeup_swapper = setrunnable(td);
-       } else
-               td->td_flags |= TDF_TIMOFAIL;
        thread_unlock(td);
        if (wakeup_swapper)
                kick_proc0();

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h Thu Jul 28 08:57:01 2016        (r303425)
+++ head/sys/sys/proc.h Thu Jul 28 09:09:55 2016        (r303426)
@@ -282,6 +282,7 @@ struct thread {
        int             td_no_sleeping; /* (k) Sleeping disabled count. */
        int             td_dom_rr_idx;  /* (k) RR Numa domain selection. */
        void            *td_su;         /* (k) FFS SU private */
+       sbintime_t      td_sleeptimo;   /* (t) Sleep timeout. */
 #define        td_endzero td_sigmask
 
 /* Copied during fork1() or create_thread(). */
@@ -388,7 +389,7 @@ do {                                                        
                \
 #define        TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
 #define        TDF_BOUNDARY    0x00000400 /* Thread suspended at user boundary 
*/
 #define        TDF_ASTPENDING  0x00000800 /* Thread has some asynchronous 
events. */
-#define        TDF_TIMOFAIL    0x00001000 /* Timeout from sleep after we were 
awake. */
+#define        TDF_UNUSED12    0x00001000 /* --available-- */
 #define        TDF_SBDRY       0x00002000 /* Stop only on usermode boundary. */
 #define        TDF_UPIBLOCKED  0x00004000 /* Thread blocked on user PI mutex. 
*/
 #define        TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to