I'd really appreciate if people could test the diffs below and report
back to me.  This adds some warnings about lock API misuse that has
already caught two bugs in DRM.  Eventually, I want these to be panics
instead, but using just warnings for now should make it easier for
people to test.

Please apply the diff below to an up-to-date -current kernel and check
dmesg for any stack backtraces.  If you see any, please send them to
me.

I'd especially like reports from both i386 and amd64, and also from
softraid users.


More details on the diff: this adds per-processor mutex lock counting
to i386 and amd64, and updates assertwaitok() to check that this value
is 0.  They also now check that the process that calls mtx_leave() is
the same that called mtx_enter().  rw_exit*() are changed to call
rw_assert_{rd,wr}lock() as appropriate.  Finally, mi_switch() also
calls assertwaitok() now.


Index: kern/subr_xxx.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_xxx.c,v
retrieving revision 1.10
diff -u -p -r1.10 subr_xxx.c
--- kern/subr_xxx.c     21 Sep 2010 01:09:10 -0000      1.10
+++ kern/subr_xxx.c     21 Sep 2010 01:45:59 -0000
@@ -153,6 +153,9 @@ blktochr(dev_t dev)
        return (NODEV);
 }
 
+#include <ddb/db_output.h>
+#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
+
 /*
  * Check that we're in a context where it's okay to sleep.
  */
@@ -160,4 +163,9 @@ void
 assertwaitok(void)
 {
        splassert(IPL_NONE);
+#ifdef __HAVE_CPU_MUTEX_LEVEL
+       if (curcpu()->ci_mutex_level != 0)
+               panic("assertwaitok: non-zero mutex count: %d",
+                   curcpu()->ci_mutex_level);
+#endif
 }
Index: kern/kern_rwlock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.15
diff -u -p -r1.15 kern_rwlock.c
--- kern/kern_rwlock.c  13 Aug 2009 23:12:15 -0000      1.15
+++ kern/kern_rwlock.c  21 Sep 2010 01:45:59 -0000
@@ -107,6 +107,8 @@ rw_exit_read(struct rwlock *rwl)
 {
        unsigned long owner = rwl->rwl_owner;
 
+       rw_assert_rdlock(rwl);
+
        if (__predict_false((owner & RWLOCK_WAIT) ||
            rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
                rw_exit(rwl);
@@ -117,6 +119,8 @@ rw_exit_write(struct rwlock *rwl)
 {
        unsigned long owner = rwl->rwl_owner;
 
+       rw_assert_wrlock(rwl);
+
        if (__predict_false((owner & RWLOCK_WAIT) ||
            rw_cas(&rwl->rwl_owner, owner, 0)))
                rw_exit(rwl);
@@ -236,6 +240,11 @@ rw_exit(struct rwlock *rwl)
        int wrlock = owner & RWLOCK_WRLOCK;
        unsigned long set;
 
+       if (wrlock)
+               rw_assert_wrlock(rwl);
+       else
+               rw_assert_rdlock(rwl);
+
        do {
                owner = rwl->rwl_owner;
                if (wrlock)
@@ -249,6 +258,10 @@ rw_exit(struct rwlock *rwl)
                wakeup(rwl);
 }
 
+#include <ddb/db_output.h>
+#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
+
+#ifdef DIAGNOSTIC
 void
 rw_assert_wrlock(struct rwlock *rwl)
 {
@@ -272,3 +285,4 @@ rw_assert_unlocked(struct rwlock *rwl)
        if (rwl->rwl_owner != 0L)
                panic("%s: lock held", rwl->rwl_name);
 }
+#endif
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.23
diff -u -p -r1.23 sched_bsd.c
--- kern/sched_bsd.c    30 Jun 2010 22:38:17 -0000      1.23
+++ kern/sched_bsd.c    21 Sep 2010 01:45:59 -0000
@@ -356,6 +356,7 @@ mi_switch(void)
        int sched_count;
 #endif
 
+       assertwaitok();
        KASSERT(p->p_stat != SONPROC);
 
        SCHED_ASSERT_LOCKED();
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.55
diff -u -p -r1.55 cpu.h
--- arch/amd64/include/cpu.h    11 Aug 2010 21:22:44 -0000      1.55
+++ arch/amd64/include/cpu.h    21 Sep 2010 01:46:00 -0000
@@ -87,6 +87,10 @@ struct cpu_info {
        int             ci_idepth;
        u_int32_t       ci_imask[NIPL];
        u_int32_t       ci_iunmask[NIPL];
+#ifdef DIAGNOSTIC
+       int             ci_mutex_level;
+#define __HAVE_CPU_MUTEX_LEVEL
+#endif
 
        u_int           ci_flags;
        u_int32_t       ci_ipis;
Index: arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.22
diff -u -p -r1.22 genassym.cf
--- arch/amd64/amd64/genassym.cf        1 Apr 2010 19:47:59 -0000       1.22
+++ arch/amd64/amd64/genassym.cf        21 Sep 2010 01:46:00 -0000
@@ -105,6 +105,9 @@ member      CPU_INFO_IDEPTH         ci_idepth
 member CPU_INFO_ISOURCES       ci_isources
 member CPU_INFO_IPENDING       ci_ipending
 member CPU_INFO_IUNMASK        ci_iunmask
+ifdef DIAGNOSTIC
+member CPU_INFO_MUTEX_LEVEL    ci_mutex_level
+endif
 member CPU_INFO_GDT            ci_gdt
 
 struct intrsource
Index: arch/amd64/amd64/mutex.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/mutex.S,v
retrieving revision 1.7
diff -u -p -r1.7 mutex.S
--- arch/amd64/amd64/mutex.S    13 Aug 2009 13:24:55 -0000      1.7
+++ arch/amd64/amd64/mutex.S    21 Sep 2010 01:46:00 -0000
@@ -62,6 +62,9 @@ ENTRY(mtx_enter)
        cmpxchgq        %rcx, MTX_OWNER(%rdi)   # test_and_set(mtx->mtx_owner)
        jne     2f
        movl    %edx, MTX_OLDIPL(%rdi)
+#ifdef DIAGNOSTIC
+       incl    CPU_INFO_MUTEX_LEVEL(%rcx)
+#endif
        ret
 
        /* We failed to obtain the lock. splx, spin and retry. */
@@ -103,6 +106,9 @@ ENTRY(mtx_enter_try)
        cmpxchgq        %rcx, MTX_OWNER(%rdi)   # test_and_set(mtx->mtx_owner)
        jne     2f
        movl    %edx, MTX_OLDIPL(%rdi)
+#ifdef DIAGNOSTIC
+       incl    CPU_INFO_MUTEX_LEVEL(%rcx)
+#endif
        movq    $1, %rax
        ret
 
@@ -122,12 +128,18 @@ ENTRY(mtx_enter_try)
 #ifdef DIAGNOSTIC
 3:     movq    $4f, %rdi
        call    _C_LABEL(panic)
-4:     .asciz  "mtx_enter: locking against myself"
+4:     .asciz  "mtx_enter_try: locking against myself"
 #endif
 
 
 ENTRY(mtx_leave)
        movq    %rdi, %rax
+#ifdef DIAGNOSTIC
+       movq    CPUVAR(SELF), %rcx
+       cmpq    MTX_OWNER(%rax), %rcx
+       jne     2f
+       decl    CPU_INFO_MUTEX_LEVEL(%rcx)
+#endif
        xorq    %rcx, %rcx
        movl    MTX_OLDIPL(%rax), %edi
        movl    %ecx, MTX_OLDIPL(%rax)
@@ -137,3 +149,9 @@ ENTRY(mtx_leave)
        call    _C_LABEL(spllower)
 1:
        ret
+
+#ifdef DIAGNOSTIC
+2:     movq    $3f, %rdi
+       call    _C_LABEL(panic)
+3:     .asciz  "mtx_leave: lock not held"
+#endif
Index: arch/i386/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/i386/include/cpu.h,v
retrieving revision 1.114
diff -u -p -r1.114 cpu.h
--- arch/i386/include/cpu.h     11 Aug 2010 21:22:44 -0000      1.114
+++ arch/i386/include/cpu.h     21 Sep 2010 01:46:00 -0000
@@ -106,6 +106,10 @@ struct cpu_info {
        int             ci_idepth;
        u_int32_t       ci_imask[NIPL];
        u_int32_t       ci_iunmask[NIPL];
+#ifdef DIAGNOSTIC
+       int             ci_mutex_level;
+#define __HAVE_CPU_MUTEX_LEVEL
+#endif
 
        paddr_t         ci_idle_pcb_paddr; /* PA of idle PCB */
        u_long          ci_flags;       /* flags; see below */
Index: arch/i386/i386/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/genassym.cf,v
retrieving revision 1.31
diff -u -p -r1.31 genassym.cf
--- arch/i386/i386/genassym.cf  3 Jul 2010 04:54:32 -0000       1.31
+++ arch/i386/i386/genassym.cf  21 Sep 2010 01:46:00 -0000
@@ -207,6 +207,9 @@ define      CPU_INFO_IUNMASK        offsetof(struct 
 define CPU_INFO_ILEVEL         offsetof(struct cpu_info, ci_ilevel)
 define CPU_INFO_IDEPTH         offsetof(struct cpu_info, ci_idepth)
 define CPU_INFO_ISOURCES       offsetof(struct cpu_info, ci_isources)
+ifdef DIAGNOSTIC
+define CPU_INFO_MUTEX_LEVEL    offsetof(struct cpu_info, ci_mutex_level)
+endif
 define CPU_INFO_CURPMAP        offsetof(struct cpu_info, ci_curpmap)
 
 struct pmap
Index: arch/i386/i386/mutex.S
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/mutex.S,v
retrieving revision 1.7
diff -u -p -r1.7 mutex.S
--- arch/i386/i386/mutex.S      13 Aug 2009 13:24:55 -0000      1.7
+++ arch/i386/i386/mutex.S      21 Sep 2010 01:46:00 -0000
@@ -63,6 +63,9 @@ ENTRY(mtx_enter)
        testl   %eax, %eax              # if (already held)
        jnz     3f
        movl    CPUVAR(SELF), %eax
+#ifdef DIAGNOSTIC
+       incl    CPU_INFO_MUTEX_LEVEL(%eax)
+#endif
        movl    %eax, MTX_OWNER(%ecx)
        movl    %edx, MTX_OLDIPL(%ecx)
        leave
@@ -107,6 +110,9 @@ ENTRY(mtx_enter_try)
        testl   %eax, %eax              # if (already held)
        jnz     3f
        movl    CPUVAR(SELF), %eax
+#ifdef DIAGNOSTIC
+       incl    CPU_INFO_MUTEX_LEVEL(%eax)
+#endif
        movl    %eax, MTX_OWNER(%ecx)
        movl    %edx, MTX_OLDIPL(%ecx)
        movl    $1, %eax
@@ -130,7 +136,7 @@ ENTRY(mtx_enter_try)
 #ifdef DIAGNOSTIC
 4:     pushl   $5f
        call    _C_LABEL(panic)
-5:     .asciz  "mtx_enter: locking against myself"
+5:     .asciz  "mtx_enter_try: locking against myself"
 #endif
 
        
@@ -138,6 +144,12 @@ ENTRY(mtx_leave)
        pushl   %ebp
        movl    %esp, %ebp
        movl    SOFF(%ebp), %ecx
+#ifdef DIAGNOSTIC
+       movl    CPUVAR(SELF), %eax
+       cmpl    %eax, MTX_OWNER(%ecx)
+       jne     1f
+       decl    CPU_INFO_MUTEX_LEVEL(%eax)
+#endif
        xorl    %eax, %eax
        movl    %eax, MTX_OWNER(%ecx)
        pushl   MTX_OLDIPL(%ecx)
@@ -146,3 +158,9 @@ ENTRY(mtx_leave)
        call    _C_LABEL(splx)
        leave
        ret
+
+#ifdef DIAGNOSTIC
+1:     pushl   $2f
+       call    _C_LABEL(panic)
+2:     .asciz  "mtx_leave: lock not held"
+#endif
Index: sys/rwlock.h
===================================================================
RCS file: /cvs/src/sys/sys/rwlock.h,v
retrieving revision 1.12
diff -u -p -r1.12 rwlock.h
--- sys/rwlock.h        13 Aug 2009 21:22:29 -0000      1.12
+++ sys/rwlock.h        21 Sep 2010 01:46:00 -0000
@@ -95,9 +95,15 @@ void rw_enter_write(struct rwlock *);
 void rw_exit_read(struct rwlock *);
 void rw_exit_write(struct rwlock *);
 
+#ifdef DIAGNOSTIC
 void rw_assert_wrlock(struct rwlock *);
 void rw_assert_rdlock(struct rwlock *);
 void rw_assert_unlocked(struct rwlock *);
+#else
+#define rw_assert_wrlock(rwl) ((void)0)
+#define rw_assert_rdlock(rwl) ((void)0)
+#define rw_assert_unlocked(rwl) ((void)0)
+#endif
 
 int rw_enter(struct rwlock *, int);
 void rw_exit(struct rwlock *);

Reply via email to