Author: attilio
Date: Sat Jan  9 01:46:38 2010
New Revision: 201879
URL: http://svn.freebsd.org/changeset/base/201879

Log:
  Introduce the new kernel thread called "deadlock resolver".
  While the name is pretentious, a good explanation of its targets is
  reported in this 17 months old presentation e-mail:
  http://lists.freebsd.org/pipermail/freebsd-arch/2008-August/008452.html
  
  In order to implement it, the sq_type in sleepqueues is mandatory and not
  only compiled along with INVARIANTS option. Additively, a new sleepqueue
  function, sleepq_type() is added, returning the type of the sleepqueue
  linked to a wchan.
  Three new sysctls are added in order to configure the thread:
  debug.deadlkres.slptime_threshold
  debug.deadlkres.blktime_threshold
  debug.deadlkres.sleepfreq
  
  rappresenting the thresholds for sleep and block time that will lead to
  a deadlock matching (when exceeded), while the sleepfreq rappresents the
  number of seconds between 2 consecutive thread runnings.
  In order to enable the deadlock resolver thread recompile your kernel
  with the option DEADLKRES.
  
  Reviewed by:  jeff
  Tested by:    pho, Giovanni Trematerra
  Sponsored by: Nokia Incorporated, Sandvine Incorporated
  MFC after:    2 weeks

Modified:
  head/UPDATING
  head/share/man/man9/sleepqueue.9
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/kern/kern_clock.c
  head/sys/kern/subr_sleepqueue.c
  head/sys/kern/subr_turnstile.c
  head/sys/sys/proc.h
  head/sys/sys/sleepqueue.h

Modified: head/UPDATING
==============================================================================
--- head/UPDATING       Sat Jan  9 01:20:01 2010        (r201878)
+++ head/UPDATING       Sat Jan  9 01:46:38 2010        (r201879)
@@ -22,6 +22,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.
        machines to maximize performance.  (To disable malloc debugging, run
        ln -s aj /etc/malloc.conf.)
 
+20100108:
+       Introduce the kernel thread "deadlock resolver" (which can be enabled
+       via the DEADLKRES option, see NOTES for more details) and the
+       sleepq_type() function for sleepqueues.
+
 20091202:
        The rc.firewall and rc.firewall6 were unified, and
        rc.firewall6 and rc.d/ip6fw were removed.

Modified: head/share/man/man9/sleepqueue.9
==============================================================================
--- head/share/man/man9/sleepqueue.9    Sat Jan  9 01:20:01 2010        
(r201878)
+++ head/share/man/man9/sleepqueue.9    Sat Jan  9 01:46:38 2010        
(r201879)
@@ -23,7 +23,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd December 12, 2009
+.Dd January 8, 2010
 .Dt SLEEPQUEUE 9
 .Os
 .Sh NAME
@@ -44,6 +44,7 @@
 .Nm sleepq_sleepcnt ,
 .Nm sleepq_timedwait ,
 .Nm sleepq_timedwait_sig ,
+.Nm sleepq_type ,
 .Nm sleepq_wait ,
 .Nm sleepq_wait_sig
 .Nd manage the queues of sleeping threads
@@ -84,6 +85,8 @@
 .Fn sleepq_timedwait "void *wchan"
 .Ft int
 .Fn sleepq_timedwait_sig "void *wchan" "int signal_caught"
+.Ft int
+.Fn sleepq_type "void *wchan"
 .Ft void
 .Fn sleepq_wait "void *wchan"
 .Ft int
@@ -366,6 +369,12 @@ given a
 .Fa wchan .
 .Pp
 The
+.Fn sleepq_type
+function returns the type of
+.Fa wchan
+associated to a sleepqueue.
+.Pp
+The
 .Fn sleepq_abort ,
 .Fn sleepq_broadcast ,
 and

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES Sat Jan  9 01:20:01 2010        (r201878)
+++ head/sys/conf/NOTES Sat Jan  9 01:46:38 2010        (r201879)
@@ -2531,6 +2531,11 @@ options  BOOTP_BLOCKSIZE=8192 # Override
 options        SW_WATCHDOG
 
 #
+# Add the software deadlock resolver thread.
+#
+options                DEADLKRES
+
+#
 # Disable swapping of stack pages.  This option removes all
 # code which actually performs swapping, so it's not possible to turn
 # it back on at run-time.

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options       Sat Jan  9 01:20:01 2010        (r201878)
+++ head/sys/conf/options       Sat Jan  9 01:46:38 2010        (r201879)
@@ -72,6 +72,7 @@ COMPAT_FREEBSD6       opt_compat.h
 COMPAT_FREEBSD7        opt_compat.h
 COMPILING_LINT opt_global.h
 CY_PCI_FASTINTR
+DEADLKRES      opt_watchdog.h
 DIRECTIO
 FULL_PREEMPTION        opt_sched.h
 IPI_PREEMPTION opt_sched.h

Modified: head/sys/kern/kern_clock.c
==============================================================================
--- head/sys/kern/kern_clock.c  Sat Jan  9 01:20:01 2010        (r201878)
+++ head/sys/kern/kern_clock.c  Sat Jan  9 01:46:38 2010        (r201879)
@@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$");
 #include <sys/callout.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
-#include <sys/lock.h>
+#include <sys/kthread.h>
 #include <sys/ktr.h>
+#include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
+#include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
@@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
     0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
 
+#ifdef DEADLKRES
+static int slptime_threshold = 1800;
+static int blktime_threshold = 900;
+static int sleepfreq = 3;
+
+static void
+deadlkres(void)
+{
+       struct proc *p;
+       struct thread *td;
+       void *wchan;
+       int blkticks, slpticks, slptype, tryl, tticks;
+
+       tryl = 0;
+       for (;;) {
+               blkticks = blktime_threshold * hz;
+               slpticks = slptime_threshold * hz;
+
+               /*
+                * Avoid to sleep on the sx_lock in order to avoid a possible
+                * priority inversion problem leading to starvation.
+                * If the lock can't be held after 100 tries, panic.
+                */
+               if (!sx_try_slock(&allproc_lock)) {
+                       if (tryl > 100)
+               panic("%s: possible deadlock detected on allproc_lock\n",
+                                   __func__);
+                       tryl++;
+                       pause("allproc_lock deadlkres", sleepfreq * hz);
+                       continue;
+               }
+               tryl = 0;
+               FOREACH_PROC_IN_SYSTEM(p) {
+                       PROC_LOCK(p);
+                       FOREACH_THREAD_IN_PROC(p, td) {
+                               thread_lock(td);
+                               if (TD_ON_LOCK(td)) {
+
+                                       /*
+                                        * The thread should be blocked on a
+                                        * turnstile, simply check if the
+                                        * turnstile channel is in good state.
+                                        */
+                                       MPASS(td->td_blocked != NULL);
+                                       tticks = ticks - td->td_blktick;
+                                       thread_unlock(td);
+                                       if (tticks > blkticks) {
+
+                                               /*
+                                                * Accordingly with provided
+                                                * thresholds, this thread is
+                                                * stuck for too long on a
+                                                * turnstile.
+                                                */
+                                               PROC_UNLOCK(p);
+                                               sx_sunlock(&allproc_lock);
+       panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+                                                   __func__, td, tticks);
+                                       }
+                               } else if (TD_IS_SLEEPING(td)) {
+
+                                       /*
+                                        * Check if the thread is sleeping on a
+                                        * lock, otherwise skip the check.
+                                        * Drop the thread lock in order to
+                                        * avoid a LOR with the sleepqueue
+                                        * spinlock.
+                                        */
+                                       wchan = td->td_wchan;
+                                       tticks = ticks - td->td_slptick;
+                                       thread_unlock(td);
+                                       slptype = sleepq_type(wchan);
+                                       if ((slptype == SLEEPQ_SX ||
+                                           slptype == SLEEPQ_LK) &&
+                                           tticks > slpticks) {
+
+                                               /*
+                                                * Accordingly with provided
+                                                * thresholds, this thread is
+                                                * stuck for too long on a
+                                                * sleepqueue.
+                                                */
+                                               PROC_UNLOCK(p);
+                                               sx_sunlock(&allproc_lock);
+       panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
+                                                   __func__, td, tticks);
+                                       }
+                               } else
+                                       thread_unlock(td);
+                       }
+                       PROC_UNLOCK(p);
+               }
+               sx_sunlock(&allproc_lock);
+
+               /* Sleep for sleepfreq seconds. */
+               pause("deadlkres", sleepfreq * hz);
+       }
+}
+
+static struct kthread_desc deadlkres_kd = {
+       "deadlkres",
+       deadlkres,
+       (struct thread **)NULL
+};
+
+SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
+
+SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
+    &slptime_threshold, 0,
+    "Number of seconds within is valid to sleep on a sleepqueue");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
+    &blktime_threshold, 0,
+    "Number of seconds within is valid to block on a turnstile");
+SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
+    "Number of seconds between any deadlock resolver thread run");
+#endif /* DEADLKRES */
+
 void
 read_cpu_time(long *cp_time)
 {

Modified: head/sys/kern/subr_sleepqueue.c
==============================================================================
--- head/sys/kern/subr_sleepqueue.c     Sat Jan  9 01:20:01 2010        
(r201878)
+++ head/sys/kern/subr_sleepqueue.c     Sat Jan  9 01:46:38 2010        
(r201879)
@@ -122,8 +122,8 @@ struct sleepqueue {
        LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
        LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
        void    *sq_wchan;                      /* (c) Wait channel. */
-#ifdef INVARIANTS
        int     sq_type;                        /* (c) Queue type. */
+#ifdef INVARIANTS
        struct lock_object *sq_lock;            /* (c) Associated lock. */
 #endif
 };
@@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_obje
                    ("thread's sleep queue has a non-empty free list"));
                KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
                sq->sq_lock = lock;
-               sq->sq_type = flags & SLEEPQ_TYPE;
 #endif
 #ifdef SLEEPQUEUE_PROFILING
                sc->sc_depth++;
@@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_obje
                sq = td->td_sleepqueue;
                LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
                sq->sq_wchan = wchan;
+               sq->sq_type = flags & SLEEPQ_TYPE;
        } else {
                MPASS(wchan == sq->sq_wchan);
                MPASS(lock == sq->sq_lock);
@@ -669,6 +669,28 @@ sleepq_timedwait_sig(void *wchan, int pr
 }
 
 /*
+ * Returns the type of sleepqueue given a waitchannel.
+ */
+int
+sleepq_type(void *wchan)
+{
+       struct sleepqueue *sq;
+       int type;
+
+       MPASS(wchan != NULL);
+
+       sleepq_lock(wchan);
+       sq = sleepq_lookup(wchan);
+       if (sq == NULL) {
+               sleepq_release(wchan);
+               return (-1);
+       }
+       type = sq->sq_type;
+       sleepq_release(wchan);
+       return (type);
+}
+
+/*
  * Removes a thread from a sleep queue and makes it
  * runnable.
  */
@@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepque
        return;
 found:
        db_printf("Wait channel: %p\n", sq->sq_wchan);
-#ifdef INVARIANTS
        db_printf("Queue type: %d\n", sq->sq_type);
+#ifdef INVARIANTS
        if (sq->sq_lock) {
                lock = sq->sq_lock;
                db_printf("Associated Interlock: %p - (%s) %s\n", lock,

Modified: head/sys/kern/subr_turnstile.c
==============================================================================
--- head/sys/kern/subr_turnstile.c      Sat Jan  9 01:20:01 2010        
(r201878)
+++ head/sys/kern/subr_turnstile.c      Sat Jan  9 01:46:38 2010        
(r201879)
@@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, str
        td->td_tsqueue = queue;
        td->td_blocked = ts;
        td->td_lockname = lock->lo_name;
+       td->td_blktick = ticks;
        TD_SET_LOCK(td);
        mtx_unlock_spin(&tc->tc_lock);
        propagate_priority(td);
@@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, i
                MPASS(TD_CAN_RUN(td));
                td->td_blocked = NULL;
                td->td_lockname = NULL;
+               td->td_blktick = 0;
 #ifdef INVARIANTS
                td->td_tsqueue = 0xff;
 #endif

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h Sat Jan  9 01:20:01 2010        (r201878)
+++ head/sys/sys/proc.h Sat Jan  9 01:46:38 2010        (r201879)
@@ -218,6 +218,7 @@ struct thread {
        struct ucred    *td_ucred;      /* (k) Reference to credentials. */
        u_int           td_estcpu;      /* (t) estimated cpu utilization */
        int             td_slptick;     /* (t) Time at sleep. */
+       int             td_blktick;     /* (t) Time spent blocked. */
        struct rusage   td_ru;          /* (t) rusage information */
        uint64_t        td_incruntime;  /* (t) Cpu ticks to transfer to proc. */
        uint64_t        td_runtime;     /* (t) How many cpu ticks we've run. */

Modified: head/sys/sys/sleepqueue.h
==============================================================================
--- head/sys/sys/sleepqueue.h   Sat Jan  9 01:20:01 2010        (r201878)
+++ head/sys/sys/sleepqueue.h   Sat Jan  9 01:46:38 2010        (r201879)
@@ -112,6 +112,7 @@ void        sleepq_set_timeout(void *wchan, int
 u_int  sleepq_sleepcnt(void *wchan, int queue);
 int    sleepq_timedwait(void *wchan, int pri);
 int    sleepq_timedwait_sig(void *wchan, int pri);
+int    sleepq_type(void *wchan);
 void   sleepq_wait(void *wchan, int pri);
 int    sleepq_wait_sig(void *wchan, int pri);
 
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to