I'd really appreciate if people could test the diffs below and report
back to me. This adds some warnings about lock API misuse that has
already caught two bugs in DRM. Eventually, I want these to be panics
instead, but using just warnings for now should make it easier for
people to test.
Please apply the diff below to an up-to-date -current kernel and check
dmesg for any stack backtraces. If you see any, please send them to
me.
I'd especially like reports from both i386 and amd64, and also from
softraid users.
More details on the diff: this adds per-processor mutex lock counting
to i386 and amd64, and updates assertwaitok() to check that this value
is 0. They also now check that the process that calls mtx_leave() is
the same that called mtx_enter(). rw_exit*() are changed to call
rw_assert_{rd,wr}lock() as appropriate. Finally, mi_switch() also
calls assertwaitok() now.
Index: kern/subr_xxx.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_xxx.c,v
retrieving revision 1.10
diff -u -p -r1.10 subr_xxx.c
--- kern/subr_xxx.c 21 Sep 2010 01:09:10 -0000 1.10
+++ kern/subr_xxx.c 21 Sep 2010 01:45:59 -0000
@@ -153,6 +153,9 @@ blktochr(dev_t dev)
return (NODEV);
}
+#include <ddb/db_output.h>
+#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
+
/*
* Check that we're in a context where it's okay to sleep.
*/
@@ -160,4 +163,9 @@ void
assertwaitok(void)
{
splassert(IPL_NONE);
+#ifdef __HAVE_CPU_MUTEX_LEVEL
+ if (curcpu()->ci_mutex_level != 0)
+ panic("assertwaitok: non-zero mutex count: %d",
+ curcpu()->ci_mutex_level);
+#endif
}
Index: kern/kern_rwlock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.15
diff -u -p -r1.15 kern_rwlock.c
--- kern/kern_rwlock.c 13 Aug 2009 23:12:15 -0000 1.15
+++ kern/kern_rwlock.c 21 Sep 2010 01:45:59 -0000
@@ -107,6 +107,8 @@ rw_exit_read(struct rwlock *rwl)
{
unsigned long owner = rwl->rwl_owner;
+ rw_assert_rdlock(rwl);
+
if (__predict_false((owner & RWLOCK_WAIT) ||
rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
rw_exit(rwl);
@@ -117,6 +119,8 @@ rw_exit_write(struct rwlock *rwl)
{
unsigned long owner = rwl->rwl_owner;
+ rw_assert_wrlock(rwl);
+
if (__predict_false((owner & RWLOCK_WAIT) ||
rw_cas(&rwl->rwl_owner, owner, 0)))
rw_exit(rwl);
@@ -236,6 +240,11 @@ rw_exit(struct rwlock *rwl)
int wrlock = owner & RWLOCK_WRLOCK;
unsigned long set;
+ if (wrlock)
+ rw_assert_wrlock(rwl);
+ else
+ rw_assert_rdlock(rwl);
+
do {
owner = rwl->rwl_owner;
if (wrlock)
@@ -249,6 +258,10 @@ rw_exit(struct rwlock *rwl)
wakeup(rwl);
}
+#include <ddb/db_output.h>
+#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
+
+#ifdef DIAGNOSTIC
void
rw_assert_wrlock(struct rwlock *rwl)
{
@@ -272,3 +285,4 @@ rw_assert_unlocked(struct rwlock *rwl)
if (rwl->rwl_owner != 0L)
panic("%s: lock held", rwl->rwl_name);
}
+#endif
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.23
diff -u -p -r1.23 sched_bsd.c
--- kern/sched_bsd.c 30 Jun 2010 22:38:17 -0000 1.23
+++ kern/sched_bsd.c 21 Sep 2010 01:45:59 -0000
@@ -356,6 +356,7 @@ mi_switch(void)
int sched_count;
#endif
+ assertwaitok();
KASSERT(p->p_stat != SONPROC);
SCHED_ASSERT_LOCKED();
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.55
diff -u -p -r1.55 cpu.h
--- arch/amd64/include/cpu.h 11 Aug 2010 21:22:44 -0000 1.55
+++ arch/amd64/include/cpu.h 21 Sep 2010 01:46:00 -0000
@@ -87,6 +87,10 @@ struct cpu_info {
int ci_idepth;
u_int32_t ci_imask[NIPL];
u_int32_t ci_iunmask[NIPL];
+#ifdef DIAGNOSTIC
+ int ci_mutex_level;
+#define __HAVE_CPU_MUTEX_LEVEL
+#endif
u_int ci_flags;
u_int32_t ci_ipis;
Index: arch/amd64/amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.22
diff -u -p -r1.22 genassym.cf
--- arch/amd64/amd64/genassym.cf 1 Apr 2010 19:47:59 -0000 1.22
+++ arch/amd64/amd64/genassym.cf 21 Sep 2010 01:46:00 -0000
@@ -105,6 +105,9 @@ member CPU_INFO_IDEPTH ci_idepth
member CPU_INFO_ISOURCES ci_isources
member CPU_INFO_IPENDING ci_ipending
member CPU_INFO_IUNMASK ci_iunmask
+ifdef DIAGNOSTIC
+member CPU_INFO_MUTEX_LEVEL ci_mutex_level
+endif
member CPU_INFO_GDT ci_gdt
struct intrsource
Index: arch/amd64/amd64/mutex.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/mutex.S,v
retrieving revision 1.7
diff -u -p -r1.7 mutex.S
--- arch/amd64/amd64/mutex.S 13 Aug 2009 13:24:55 -0000 1.7
+++ arch/amd64/amd64/mutex.S 21 Sep 2010 01:46:00 -0000
@@ -62,6 +62,9 @@ ENTRY(mtx_enter)
cmpxchgq %rcx, MTX_OWNER(%rdi) # test_and_set(mtx->mtx_owner)
jne 2f
movl %edx, MTX_OLDIPL(%rdi)
+#ifdef DIAGNOSTIC
+ incl CPU_INFO_MUTEX_LEVEL(%rcx)
+#endif
ret
/* We failed to obtain the lock. splx, spin and retry. */
@@ -103,6 +106,9 @@ ENTRY(mtx_enter_try)
cmpxchgq %rcx, MTX_OWNER(%rdi) # test_and_set(mtx->mtx_owner)
jne 2f
movl %edx, MTX_OLDIPL(%rdi)
+#ifdef DIAGNOSTIC
+ incl CPU_INFO_MUTEX_LEVEL(%rcx)
+#endif
movq $1, %rax
ret
@@ -122,12 +128,18 @@ ENTRY(mtx_enter_try)
#ifdef DIAGNOSTIC
3: movq $4f, %rdi
call _C_LABEL(panic)
-4: .asciz "mtx_enter: locking against myself"
+4: .asciz "mtx_enter_try: locking against myself"
#endif
ENTRY(mtx_leave)
movq %rdi, %rax
+#ifdef DIAGNOSTIC
+ movq CPUVAR(SELF), %rcx
+ cmpq MTX_OWNER(%rax), %rcx
+ jne 2f
+ decl CPU_INFO_MUTEX_LEVEL(%rcx)
+#endif
xorq %rcx, %rcx
movl MTX_OLDIPL(%rax), %edi
movl %ecx, MTX_OLDIPL(%rax)
@@ -137,3 +149,9 @@ ENTRY(mtx_leave)
call _C_LABEL(spllower)
1:
ret
+
+#ifdef DIAGNOSTIC
+2: movq $3f, %rdi
+ call _C_LABEL(panic)
+3: .asciz "mtx_leave: lock not held"
+#endif
Index: arch/i386/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/i386/include/cpu.h,v
retrieving revision 1.114
diff -u -p -r1.114 cpu.h
--- arch/i386/include/cpu.h 11 Aug 2010 21:22:44 -0000 1.114
+++ arch/i386/include/cpu.h 21 Sep 2010 01:46:00 -0000
@@ -106,6 +106,10 @@ struct cpu_info {
int ci_idepth;
u_int32_t ci_imask[NIPL];
u_int32_t ci_iunmask[NIPL];
+#ifdef DIAGNOSTIC
+ int ci_mutex_level;
+#define __HAVE_CPU_MUTEX_LEVEL
+#endif
paddr_t ci_idle_pcb_paddr; /* PA of idle PCB */
u_long ci_flags; /* flags; see below */
Index: arch/i386/i386/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/genassym.cf,v
retrieving revision 1.31
diff -u -p -r1.31 genassym.cf
--- arch/i386/i386/genassym.cf 3 Jul 2010 04:54:32 -0000 1.31
+++ arch/i386/i386/genassym.cf 21 Sep 2010 01:46:00 -0000
@@ -207,6 +207,9 @@ define CPU_INFO_IUNMASK offsetof(struct
define CPU_INFO_ILEVEL offsetof(struct cpu_info, ci_ilevel)
define CPU_INFO_IDEPTH offsetof(struct cpu_info, ci_idepth)
define CPU_INFO_ISOURCES offsetof(struct cpu_info, ci_isources)
+ifdef DIAGNOSTIC
+define CPU_INFO_MUTEX_LEVEL offsetof(struct cpu_info, ci_mutex_level)
+endif
define CPU_INFO_CURPMAP offsetof(struct cpu_info, ci_curpmap)
struct pmap
Index: arch/i386/i386/mutex.S
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/mutex.S,v
retrieving revision 1.7
diff -u -p -r1.7 mutex.S
--- arch/i386/i386/mutex.S 13 Aug 2009 13:24:55 -0000 1.7
+++ arch/i386/i386/mutex.S 21 Sep 2010 01:46:00 -0000
@@ -63,6 +63,9 @@ ENTRY(mtx_enter)
testl %eax, %eax # if (already held)
jnz 3f
movl CPUVAR(SELF), %eax
+#ifdef DIAGNOSTIC
+ incl CPU_INFO_MUTEX_LEVEL(%eax)
+#endif
movl %eax, MTX_OWNER(%ecx)
movl %edx, MTX_OLDIPL(%ecx)
leave
@@ -107,6 +110,9 @@ ENTRY(mtx_enter_try)
testl %eax, %eax # if (already held)
jnz 3f
movl CPUVAR(SELF), %eax
+#ifdef DIAGNOSTIC
+ incl CPU_INFO_MUTEX_LEVEL(%eax)
+#endif
movl %eax, MTX_OWNER(%ecx)
movl %edx, MTX_OLDIPL(%ecx)
movl $1, %eax
@@ -130,7 +136,7 @@ ENTRY(mtx_enter_try)
#ifdef DIAGNOSTIC
4: pushl $5f
call _C_LABEL(panic)
-5: .asciz "mtx_enter: locking against myself"
+5: .asciz "mtx_enter_try: locking against myself"
#endif
@@ -138,6 +144,12 @@ ENTRY(mtx_leave)
pushl %ebp
movl %esp, %ebp
movl SOFF(%ebp), %ecx
+#ifdef DIAGNOSTIC
+ movl CPUVAR(SELF), %eax
+ cmpl %eax, MTX_OWNER(%ecx)
+ jne 1f
+ decl CPU_INFO_MUTEX_LEVEL(%eax)
+#endif
xorl %eax, %eax
movl %eax, MTX_OWNER(%ecx)
pushl MTX_OLDIPL(%ecx)
@@ -146,3 +158,9 @@ ENTRY(mtx_leave)
call _C_LABEL(splx)
leave
ret
+
+#ifdef DIAGNOSTIC
+1: pushl $2f
+ call _C_LABEL(panic)
+2: .asciz "mtx_leave: lock not held"
+#endif
Index: sys/rwlock.h
===================================================================
RCS file: /cvs/src/sys/sys/rwlock.h,v
retrieving revision 1.12
diff -u -p -r1.12 rwlock.h
--- sys/rwlock.h 13 Aug 2009 21:22:29 -0000 1.12
+++ sys/rwlock.h 21 Sep 2010 01:46:00 -0000
@@ -95,9 +95,15 @@ void rw_enter_write(struct rwlock *);
void rw_exit_read(struct rwlock *);
void rw_exit_write(struct rwlock *);
+#ifdef DIAGNOSTIC
void rw_assert_wrlock(struct rwlock *);
void rw_assert_rdlock(struct rwlock *);
void rw_assert_unlocked(struct rwlock *);
+#else
+#define rw_assert_wrlock(rwl) ((void)0)
+#define rw_assert_rdlock(rwl) ((void)0)
+#define rw_assert_unlocked(rwl) ((void)0)
+#endif
int rw_enter(struct rwlock *, int);
void rw_exit(struct rwlock *);