Author: attilio
Date: Mon Jan 18 14:43:44 2010
New Revision: 202576
URL: http://svn.freebsd.org/changeset/base/202576

Log:
  MFC r200447,201703,201709-201710:
  In current code, threads performing an interruptible sleep
  will leave the waiters flag on forcing the owner to do a wakeup even
  when the waiter queue is empty.
  That operation may lead to a deadlock in the case of doing a fake wakeup
  on the "preferred" queue while the other queue has real waiters on it,
  because nobody is going to wakeup the 2nd queue waiters and they will
  sleep indefinitively.
  A similar bug, is present, for lockmgr in the case the waiters are
  sleeping with LK_SLEEPFAIL on.
  
  Add a sleepqueue interface which does report the actual number of waiters
  on a specified queue of a waitchannel and track if at least one sleepfail
  waiter is present or not. In presence of this or empty "preferred" queue,
  wakeup both waiters queues.
  
  Discussed with:       kib
  Tested by:    Pete French <petefrench at ticketswitch dot com>,
                Justin Head <justin at encarnate dot com>

Modified:
  stable/8/share/man/man9/sleepqueue.9
  stable/8/sys/kern/kern_lock.c
  stable/8/sys/kern/kern_sx.c
  stable/8/sys/kern/subr_sleepqueue.c
  stable/8/sys/sys/lockmgr.h
  stable/8/sys/sys/sleepqueue.h
Directory Properties:
  stable/8/share/man/man9/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/share/man/man9/sleepqueue.9
==============================================================================
--- stable/8/share/man/man9/sleepqueue.9        Mon Jan 18 14:32:38 2010        
(r202575)
+++ stable/8/share/man/man9/sleepqueue.9        Mon Jan 18 14:43:44 2010        
(r202576)
@@ -23,7 +23,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd December 12, 2009
+.Dd January 18, 2010
 .Dt SLEEPQUEUE 9
 .Os
 .Sh NAME
@@ -41,6 +41,7 @@
 .Nm sleepq_remove ,
 .Nm sleepq_signal ,
 .Nm sleepq_set_timeout ,
+.Nm sleepq_sleepcnt ,
 .Nm sleepq_timedwait ,
 .Nm sleepq_timedwait_sig ,
 .Nm sleepq_wait ,
@@ -77,6 +78,8 @@
 .Fn sleepq_signal "void *wchan" "int flags" "int pri" "int queue"
 .Ft void
 .Fn sleepq_set_timeout "void *wchan" "int timo"
+.Ft u_int
+.Fn sleepq_sleepcnt "void *wchan" "int queue"
 .Ft int
 .Fn sleepq_timedwait "void *wchan"
 .Ft int
@@ -355,6 +358,14 @@ One possible use is waking up a specific
 channel.
 .Pp
 The
+.Fn sleepq_sleepcnt
+function offer a simple way to retrieve the number of threads sleeping for
+the specified
+.Fa queue ,
+given a
+.Fa wchan .
+.Pp
+The
 .Fn sleepq_abort ,
 .Fn sleepq_broadcast ,
 and

Modified: stable/8/sys/kern/kern_lock.c
==============================================================================
--- stable/8/sys/kern/kern_lock.c       Mon Jan 18 14:32:38 2010        
(r202575)
+++ stable/8/sys/kern/kern_lock.c       Mon Jan 18 14:43:44 2010        
(r202576)
@@ -54,8 +54,8 @@ __FBSDID("$FreeBSD$");
 #include <ddb/ddb.h>
 #endif
 
-CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
-    (LK_ADAPTIVE | LK_NOSHARE));
+CTASSERT(((LK_ADAPTIVE | LK_EXSLPFAIL | LK_NOSHARE) & LO_CLASSFLAGS) ==
+    (LK_ADAPTIVE | LK_EXSLPFAIL | LK_NOSHARE));
 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
 
@@ -194,6 +194,14 @@ sleeplk(struct lock *lk, u_int flags, st
 
        if (flags & LK_INTERLOCK)
                class->lc_unlock(ilk);
+
+       /*
+        * LK_EXSLPFAIL is not invariant during the lock pattern but it is
+        * always protected by the sleepqueue spinlock, thus it is safe to
+        * handle within the lo_flags.
+        */
+       if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
+               lk->lock_object.lo_flags |= LK_EXSLPFAIL;
        GIANT_SAVE();
        sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
            SLEEPQ_INTERRUPTIBLE : 0), queue);
@@ -222,6 +230,7 @@ static __inline int
 wakeupshlk(struct lock *lk, const char *file, int line)
 {
        uintptr_t v, x;
+       u_int realexslp;
        int queue, wakeup_swapper;
 
        TD_LOCKS_DEC(curthread);
@@ -267,13 +276,45 @@ wakeupshlk(struct lock *lk, const char *
                /*
                 * If the lock has exclusive waiters, give them preference in
                 * order to avoid deadlock with shared runners up.
+                * If interruptible sleeps left the exclusive queue empty
+                * avoid a starvation for the threads sleeping on the shared
+                * queue by giving them precedence and cleaning up the
+                * exclusive waiters bit anyway.
+                * Please note that the LK_EXSLPFAIL flag may be lying about
+                * the real presence of waiters with the LK_SLEEPFAIL flag on
+                * because they may be used in conjuction with interruptible
+                * sleeps.
                 */
-               if (x & LK_EXCLUSIVE_WAITERS) {
-                       queue = SQ_EXCLUSIVE_QUEUE;
-                       v |= (x & LK_SHARED_WAITERS);
+               realexslp = sleepq_sleepcnt(&lk->lock_object,
+                   SQ_EXCLUSIVE_QUEUE);
+               if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
+                       if ((lk->lock_object.lo_flags & LK_EXSLPFAIL) == 0) {
+                               lk->lock_object.lo_flags &= ~LK_EXSLPFAIL;
+                               queue = SQ_EXCLUSIVE_QUEUE;
+                               v |= (x & LK_SHARED_WAITERS);
+                       } else {
+                               lk->lock_object.lo_flags &= ~LK_EXSLPFAIL;
+                               LOCK_LOG2(lk,
+                                   "%s: %p has only LK_SLEEPFAIL sleepers",
+                                   __func__, lk);
+                               LOCK_LOG2(lk,
+                           "%s: %p waking up threads on the exclusive queue",
+                                   __func__, lk);
+                               wakeup_swapper =
+                                   sleepq_broadcast(&lk->lock_object,
+                                   SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
+                               queue = SQ_SHARED_QUEUE;
+                       }
+                               
                } else {
-                       MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
-                           LK_SHARED_WAITERS);
+
+                       /*
+                        * Exclusive waiters sleeping with LK_SLEEPFAIL on
+                        * and using interruptible sleeps/timeout may have
+                        * left spourious LK_EXSLPFAIL flag on, so clean
+                        * it up anyway.
+                        */
+                       lk->lock_object.lo_flags &= ~LK_EXSLPFAIL;
                        queue = SQ_SHARED_QUEUE;
                }
 
@@ -285,7 +326,7 @@ wakeupshlk(struct lock *lk, const char *
                LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
                    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
                    "exclusive");
-               wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
+               wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
                    0, queue);
                sleepq_release(&lk->lock_object);
                break;
@@ -362,6 +403,8 @@ lockdestroy(struct lock *lk)
 
        KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
        KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
+       KASSERT((lk->lock_object.lo_flags & LK_EXSLPFAIL) == 0,
+           ("lockmgr still exclusive waiters"));
        lock_destroy(&lk->lock_object);
 }
 
@@ -373,7 +416,7 @@ __lockmgr_args(struct lock *lk, u_int fl
        struct lock_class *class;
        const char *iwmesg;
        uintptr_t tid, v, x;
-       u_int op;
+       u_int op, realexslp;
        int error, ipri, itimo, queue, wakeup_swapper;
 #ifdef LOCK_PROFILING
        uint64_t waittime = 0;
@@ -903,14 +946,48 @@ __lockmgr_args(struct lock *lk, u_int fl
                         * If the lock has exclusive waiters, give them
                         * preference in order to avoid deadlock with
                         * shared runners up.
+                        * If interruptible sleeps left the exclusive queue
+                        * empty avoid a starvation for the threads sleeping
+                        * on the shared queue by giving them precedence
+                        * and cleaning up the exclusive waiters bit anyway.
+                        * Please note that the LK_EXSLPFAIL flag may be lying
+                        * about the real presence of waiters with the
+                        * LK_SLEEPFAIL flag on because they may be used in
+                        * conjuction with interruptible sleeps.
                         */
                        MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
-                       if (x & LK_EXCLUSIVE_WAITERS) {
-                               queue = SQ_EXCLUSIVE_QUEUE;
-                               v |= (x & LK_SHARED_WAITERS);
+                       realexslp = sleepq_sleepcnt(&lk->lock_object,
+                           SQ_EXCLUSIVE_QUEUE);
+                       if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
+                               if ((lk->lock_object.lo_flags &
+                                   LK_EXSLPFAIL) == 0) {
+                                       lk->lock_object.lo_flags &=
+                                           ~LK_EXSLPFAIL;
+                                       queue = SQ_EXCLUSIVE_QUEUE;
+                                       v |= (x & LK_SHARED_WAITERS);
+                               } else {
+                                       lk->lock_object.lo_flags &=
+                                           ~LK_EXSLPFAIL;
+                                       LOCK_LOG2(lk,
+                                       "%s: %p has only LK_SLEEPFAIL sleepers",
+                                           __func__, lk);
+                                       LOCK_LOG2(lk,
+                       "%s: %p waking up threads on the exclusive queue",
+                                           __func__, lk);
+                                       wakeup_swapper =
+                                           sleepq_broadcast(&lk->lock_object,
+                                           SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
+                                       queue = SQ_SHARED_QUEUE;
+                               }
                        } else {
-                               MPASS((x & LK_ALL_WAITERS) ==
-                                   LK_SHARED_WAITERS);
+
+                               /*
+                                * Exclusive waiters sleeping with LK_SLEEPFAIL
+                                * on and using interruptible sleeps/timeout
+                                * may have left spourious LK_EXSLPFAIL flag
+                                * on, so clean it up anyway.
+                                */
+                               lk->lock_object.lo_flags &= ~LK_EXSLPFAIL;
                                queue = SQ_SHARED_QUEUE;
                        }
 
@@ -919,7 +996,7 @@ __lockmgr_args(struct lock *lk, u_int fl
                            __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
                            "exclusive");
                        atomic_store_rel_ptr(&lk->lk_lock, v);
-                       wakeup_swapper = sleepq_broadcast(&lk->lock_object,
+                       wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
                            SLEEPQ_LK, 0, queue);
                        sleepq_release(&lk->lock_object);
                        break;
@@ -976,14 +1053,64 @@ __lockmgr_args(struct lock *lk, u_int fl
                        v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
                        if ((x & ~v) == LK_UNLOCKED) {
                                v = (x & ~LK_EXCLUSIVE_SPINNERS);
+
+                               /*
+                                * If interruptible sleeps left the exclusive
+                                * queue empty avoid a starvation for the
+                                * threads sleeping on the shared queue by
+                                * giving them precedence and cleaning up the
+                                * exclusive waiters bit anyway.
+                                * Please note that the LK_EXSLPFAIL flag may
+                                * be lying about the real presence of waiters
+                                * with the LK_SLEEPFAIL flag on because they
+                                * may be used in conjuction with interruptible
+                                * sleeps.
+                                */
                                if (v & LK_EXCLUSIVE_WAITERS) {
                                        queue = SQ_EXCLUSIVE_QUEUE;
                                        v &= ~LK_EXCLUSIVE_WAITERS;
                                } else {
+
+                                       /*
+                                        * Exclusive waiters sleeping with
+                                        * LK_SLEEPFAIL on and using
+                                        * interruptible sleeps/timeout may
+                                        * have left spourious LK_EXSLPFAIL
+                                        * flag on, so clean it up anyway.
+                                        */
                                        MPASS(v & LK_SHARED_WAITERS);
+                                       lk->lock_object.lo_flags &=
+                                           ~LK_EXSLPFAIL;
                                        queue = SQ_SHARED_QUEUE;
                                        v &= ~LK_SHARED_WAITERS;
                                }
+                               if (queue == SQ_EXCLUSIVE_QUEUE) {
+                                       realexslp =
+                                           sleepq_sleepcnt(&lk->lock_object,
+                                           SQ_EXCLUSIVE_QUEUE);
+                                       if ((lk->lock_object.lo_flags &
+                                           LK_EXSLPFAIL) == 0) {
+                                               lk->lock_object.lo_flags &=
+                                                   ~LK_EXSLPFAIL;
+                                               queue = SQ_SHARED_QUEUE;
+                                               v &= ~LK_SHARED_WAITERS;
+                                               if (realexslp != 0) {
+                                                       LOCK_LOG2(lk,
+                                       "%s: %p has only LK_SLEEPFAIL sleepers",
+                                                           __func__, lk);
+                                                       LOCK_LOG2(lk,
+                       "%s: %p waking up threads on the exclusive queue",
+                                                           __func__, lk);
+                                                       wakeup_swapper =
+                                                           sleepq_broadcast(
+                                                           &lk->lock_object,
+                                                           SLEEPQ_LK, 0,
+                                                           SQ_EXCLUSIVE_QUEUE);
+                                               }
+                                       } else
+                                               lk->lock_object.lo_flags &=
+                                                   ~LK_EXSLPFAIL;
+                               }
                                if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
                                        sleepq_release(&lk->lock_object);
                                        continue;

Modified: stable/8/sys/kern/kern_sx.c
==============================================================================
--- stable/8/sys/kern/kern_sx.c Mon Jan 18 14:32:38 2010        (r202575)
+++ stable/8/sys/kern/kern_sx.c Mon Jan 18 14:43:44 2010        (r202576)
@@ -702,8 +702,12 @@ _sx_xunlock_hard(struct sx *sx, uintptr_
         * ideal.  It gives precedence to shared waiters if they are
         * present.  For this condition, we have to preserve the
         * state of the exclusive waiters flag.
+        * If interruptible sleeps left the shared queue empty avoid a
+        * starvation for the threads sleeping on the exclusive queue by giving
+        * them precedence and cleaning up the shared waiters bit anyway.
         */
-       if (sx->sx_lock & SX_LOCK_SHARED_WAITERS) {
+       if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 &&
+           sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
                queue = SQ_SHARED_QUEUE;
                x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
        } else

Modified: stable/8/sys/kern/subr_sleepqueue.c
==============================================================================
--- stable/8/sys/kern/subr_sleepqueue.c Mon Jan 18 14:32:38 2010        
(r202575)
+++ stable/8/sys/kern/subr_sleepqueue.c Mon Jan 18 14:43:44 2010        
(r202576)
@@ -118,6 +118,7 @@ __FBSDID("$FreeBSD$");
  */
 struct sleepqueue {
        TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];    /* (c) Blocked threads. 
*/
+       u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
        LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
        LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
        void    *sq_wchan;                      /* (c) Wait channel. */
@@ -306,9 +307,12 @@ sleepq_add(void *wchan, struct lock_obje
                int i;
 
                sq = td->td_sleepqueue;
-               for (i = 0; i < NR_SLEEPQS; i++)
+               for (i = 0; i < NR_SLEEPQS; i++) {
                        KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
-                               ("thread's sleep queue %d is not empty", i));
+                           ("thread's sleep queue %d is not empty", i));
+                       KASSERT(sq->sq_blockedcnt[i] == 0,
+                           ("thread's sleep queue %d count mismatches", i));
+               }
                KASSERT(LIST_EMPTY(&sq->sq_free),
                    ("thread's sleep queue has a non-empty free list"));
                KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
@@ -334,6 +338,7 @@ sleepq_add(void *wchan, struct lock_obje
        }
        thread_lock(td);
        TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
+       sq->sq_blockedcnt[queue]++;
        td->td_sleepqueue = NULL;
        td->td_sqqueue = queue;
        td->td_wchan = wchan;
@@ -367,6 +372,22 @@ sleepq_set_timeout(void *wchan, int timo
 }
 
 /*
+ * Return the number of actual sleepers for the specified queue.
+ */
+u_int
+sleepq_sleepcnt(void *wchan, int queue)
+{
+       struct sleepqueue *sq;
+
+       KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
+       MPASS((queue >= 0) && (queue < NR_SLEEPQS));
+       sq = sleepq_lookup(wchan);
+       if (sq == NULL)
+               return (0);
+       return (sq->sq_blockedcnt[queue]);
+}
+
+/*
  * Marks the pending sleep of the current thread as interruptible and
  * makes an initial check for pending signals before putting a thread
  * to sleep. Enters and exits with the thread lock held.  Thread lock
@@ -665,6 +686,7 @@ sleepq_resume_thread(struct sleepqueue *
        mtx_assert(&sc->sc_lock, MA_OWNED);
 
        /* Remove the thread from the queue. */
+       sq->sq_blockedcnt[td->td_sqqueue]--;
        TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
 
        /*
@@ -720,8 +742,10 @@ sleepq_dtor(void *mem, int size, void *a
        int i;
 
        sq = mem;
-       for (i = 0; i < NR_SLEEPQS; i++)
+       for (i = 0; i < NR_SLEEPQS; i++) {
                MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
+               MPASS(sq->sq_blockedcnt[i] == 0);
+       }
 }
 #endif
 
@@ -736,8 +760,10 @@ sleepq_init(void *mem, int size, int fla
 
        bzero(mem, size);
        sq = mem;
-       for (i = 0; i < NR_SLEEPQS; i++)
+       for (i = 0; i < NR_SLEEPQS; i++) {
                TAILQ_INIT(&sq->sq_blocked[i]);
+               sq->sq_blockedcnt[i] = 0;
+       }
        LIST_INIT(&sq->sq_free);
        return (0);
 }
@@ -1170,6 +1196,7 @@ found:
                                          td->td_tid, td->td_proc->p_pid,
                                          td->td_name);
                        }
+               db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
        }
 }
 

Modified: stable/8/sys/sys/lockmgr.h
==============================================================================
--- stable/8/sys/sys/lockmgr.h  Mon Jan 18 14:32:38 2010        (r202575)
+++ stable/8/sys/sys/lockmgr.h  Mon Jan 18 14:43:44 2010        (r202576)
@@ -144,6 +144,9 @@ _lockmgr_args_rw(struct lock *lk, u_int 
 #define        LK_QUIET        0x000020
 #define        LK_ADAPTIVE     0x000040
 
+/* LK_EXSLPFAIL to follow, even if not used in lockinit() */
+#define        LK_EXSLPFAIL    0x000080
+
 /*
  * Additional attributes to be used in lockmgr().
  */

Modified: stable/8/sys/sys/sleepqueue.h
==============================================================================
--- stable/8/sys/sys/sleepqueue.h       Mon Jan 18 14:32:38 2010        
(r202575)
+++ stable/8/sys/sys/sleepqueue.h       Mon Jan 18 14:43:44 2010        
(r202576)
@@ -109,6 +109,7 @@ void        sleepq_release(void *wchan);
 void   sleepq_remove(struct thread *td, void *wchan);
 int    sleepq_signal(void *wchan, int flags, int pri, int queue);
 void   sleepq_set_timeout(void *wchan, int timo);
+u_int  sleepq_sleepcnt(void *wchan, int queue);
 int    sleepq_timedwait(void *wchan, int pri);
 int    sleepq_timedwait_sig(void *wchan, int pri);
 void   sleepq_wait(void *wchan, int pri);
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to