On Thu, Sep 23, 2010 at 10:49:01PM -0700, Matthew Dempsky wrote:
> I'd like to commit this. I've received positive reports from a few
> amd64 users and an i386 and softraid user, and all of the locking bugs
> exposed so far have already been fixed.
>
> I plan to remove the "#define panic()" hacks and let future locking
> problems actually panic; if anyone thinks they should stay in for a
> bit longer, let me know.
>
> ok?
ok krw@
.... Ken
>
>
> On Mon, Sep 20, 2010 at 08:00:27PM -0700, Matthew Dempsky wrote:
> > I'd really appreciate if people could test the diffs below and report
> > back to me. This adds some warnings about lock API misuse that has
> > already caught two bugs in DRM. Eventually, I want these to be panics
> > instead, but using just warnings for now should make it easier for
> > people to test.
> >
> > Please apply the diff below to an up-to-date -current kernel and check
> > dmesg for any stack backtraces. If you see any, please send them to
> > me.
> >
> > I'd especially like reports from both i386 and amd64, and also from
> > softraid users.
> >
> >
> > More details on the diff: this adds per-processor mutex lock counting
> > to i386 and amd64, and updates assertwaitok() to check that this value
> > is 0. They also now check that the process that calls mtx_leave() is
> > the same that called mtx_enter(). rw_exit*() are changed to call
> > rw_assert_{rd,wr}lock() as appropriate. Finally, mi_switch() also
> > calls assertwaitok() now.
> >
> >
> > Index: kern/subr_xxx.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/subr_xxx.c,v
> > retrieving revision 1.10
> > diff -u -p -r1.10 subr_xxx.c
> > --- kern/subr_xxx.c 21 Sep 2010 01:09:10 -0000 1.10
> > +++ kern/subr_xxx.c 21 Sep 2010 01:45:59 -0000
> > @@ -153,6 +153,9 @@ blktochr(dev_t dev)
> > return (NODEV);
> > }
> >
> > +#include <ddb/db_output.h>
> > +#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
> > +
> > /*
> > * Check that we're in a context where it's okay to sleep.
> > */
> > @@ -160,4 +163,9 @@ void
> > assertwaitok(void)
> > {
> > splassert(IPL_NONE);
> > +#ifdef __HAVE_CPU_MUTEX_LEVEL
> > + if (curcpu()->ci_mutex_level != 0)
> > + panic("assertwaitok: non-zero mutex count: %d",
> > + curcpu()->ci_mutex_level);
> > +#endif
> > }
> > Index: kern/kern_rwlock.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> > retrieving revision 1.15
> > diff -u -p -r1.15 kern_rwlock.c
> > --- kern/kern_rwlock.c 13 Aug 2009 23:12:15 -0000 1.15
> > +++ kern/kern_rwlock.c 21 Sep 2010 01:45:59 -0000
> > @@ -107,6 +107,8 @@ rw_exit_read(struct rwlock *rwl)
> > {
> > unsigned long owner = rwl->rwl_owner;
> >
> > + rw_assert_rdlock(rwl);
> > +
> > if (__predict_false((owner & RWLOCK_WAIT) ||
> > rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
> > rw_exit(rwl);
> > @@ -117,6 +119,8 @@ rw_exit_write(struct rwlock *rwl)
> > {
> > unsigned long owner = rwl->rwl_owner;
> >
> > + rw_assert_wrlock(rwl);
> > +
> > if (__predict_false((owner & RWLOCK_WAIT) ||
> > rw_cas(&rwl->rwl_owner, owner, 0)))
> > rw_exit(rwl);
> > @@ -236,6 +240,11 @@ rw_exit(struct rwlock *rwl)
> > int wrlock = owner & RWLOCK_WRLOCK;
> > unsigned long set;
> >
> > + if (wrlock)
> > + rw_assert_wrlock(rwl);
> > + else
> > + rw_assert_rdlock(rwl);
> > +
> > do {
> > owner = rwl->rwl_owner;
> > if (wrlock)
> > @@ -249,6 +258,10 @@ rw_exit(struct rwlock *rwl)
> > wakeup(rwl);
> > }
> >
> > +#include <ddb/db_output.h>
> > +#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
> > +
> > +#ifdef DIAGNOSTIC
> > void
> > rw_assert_wrlock(struct rwlock *rwl)
> > {
> > @@ -272,3 +285,4 @@ rw_assert_unlocked(struct rwlock *rwl)
> > if (rwl->rwl_owner != 0L)
> > panic("%s: lock held", rwl->rwl_name);
> > }
> > +#endif
> > Index: kern/sched_bsd.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> > retrieving revision 1.23
> > diff -u -p -r1.23 sched_bsd.c
> > --- kern/sched_bsd.c 30 Jun 2010 22:38:17 -0000 1.23
> > +++ kern/sched_bsd.c 21 Sep 2010 01:45:59 -0000
> > @@ -356,6 +356,7 @@ mi_switch(void)
> > int sched_count;
> > #endif
> >
> > + assertwaitok();
> > KASSERT(p->p_stat != SONPROC);
> >
> > SCHED_ASSERT_LOCKED();
> > Index: arch/amd64/include/cpu.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> > retrieving revision 1.55
> > diff -u -p -r1.55 cpu.h
> > --- arch/amd64/include/cpu.h 11 Aug 2010 21:22:44 -0000 1.55
> > +++ arch/amd64/include/cpu.h 21 Sep 2010 01:46:00 -0000
> > @@ -87,6 +87,10 @@ struct cpu_info {
> > int ci_idepth;
> > u_int32_t ci_imask[NIPL];
> > u_int32_t ci_iunmask[NIPL];
> > +#ifdef DIAGNOSTIC
> > + int ci_mutex_level;
> > +#define __HAVE_CPU_MUTEX_LEVEL
> > +#endif
> >
> > u_int ci_flags;
> > u_int32_t ci_ipis;
> > Index: arch/amd64/amd64/genassym.cf
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
> > retrieving revision 1.22
> > diff -u -p -r1.22 genassym.cf
> > --- arch/amd64/amd64/genassym.cf 1 Apr 2010 19:47:59 -0000 1.22
> > +++ arch/amd64/amd64/genassym.cf 21 Sep 2010 01:46:00 -0000
> > @@ -105,6 +105,9 @@ member CPU_INFO_IDEPTH ci_idepth
> > member CPU_INFO_ISOURCES ci_isources
> > member CPU_INFO_IPENDING ci_ipending
> > member CPU_INFO_IUNMASK ci_iunmask
> > +ifdef DIAGNOSTIC
> > +member CPU_INFO_MUTEX_LEVEL ci_mutex_level
> > +endif
> > member CPU_INFO_GDT ci_gdt
> >
> > struct intrsource
> > Index: arch/amd64/amd64/mutex.S
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/mutex.S,v
> > retrieving revision 1.7
> > diff -u -p -r1.7 mutex.S
> > --- arch/amd64/amd64/mutex.S 13 Aug 2009 13:24:55 -0000 1.7
> > +++ arch/amd64/amd64/mutex.S 21 Sep 2010 01:46:00 -0000
> > @@ -62,6 +62,9 @@ ENTRY(mtx_enter)
> > cmpxchgq %rcx, MTX_OWNER(%rdi) # test_and_set(mtx->mtx_owner)
> > jne 2f
> > movl %edx, MTX_OLDIPL(%rdi)
> > +#ifdef DIAGNOSTIC
> > + incl CPU_INFO_MUTEX_LEVEL(%rcx)
> > +#endif
> > ret
> >
> > /* We failed to obtain the lock. splx, spin and retry. */
> > @@ -103,6 +106,9 @@ ENTRY(mtx_enter_try)
> > cmpxchgq %rcx, MTX_OWNER(%rdi) # test_and_set(mtx->mtx_owner)
> > jne 2f
> > movl %edx, MTX_OLDIPL(%rdi)
> > +#ifdef DIAGNOSTIC
> > + incl CPU_INFO_MUTEX_LEVEL(%rcx)
> > +#endif
> > movq $1, %rax
> > ret
> >
> > @@ -122,12 +128,18 @@ ENTRY(mtx_enter_try)
> > #ifdef DIAGNOSTIC
> > 3: movq $4f, %rdi
> > call _C_LABEL(panic)
> > -4: .asciz "mtx_enter: locking against myself"
> > +4: .asciz "mtx_enter_try: locking against myself"
> > #endif
> >
> >
> > ENTRY(mtx_leave)
> > movq %rdi, %rax
> > +#ifdef DIAGNOSTIC
> > + movq CPUVAR(SELF), %rcx
> > + cmpq MTX_OWNER(%rax), %rcx
> > + jne 2f
> > + decl CPU_INFO_MUTEX_LEVEL(%rcx)
> > +#endif
> > xorq %rcx, %rcx
> > movl MTX_OLDIPL(%rax), %edi
> > movl %ecx, MTX_OLDIPL(%rax)
> > @@ -137,3 +149,9 @@ ENTRY(mtx_leave)
> > call _C_LABEL(spllower)
> > 1:
> > ret
> > +
> > +#ifdef DIAGNOSTIC
> > +2: movq $3f, %rdi
> > + call _C_LABEL(panic)
> > +3: .asciz "mtx_leave: lock not held"
> > +#endif
> > Index: arch/i386/include/cpu.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/i386/include/cpu.h,v
> > retrieving revision 1.114
> > diff -u -p -r1.114 cpu.h
> > --- arch/i386/include/cpu.h 11 Aug 2010 21:22:44 -0000 1.114
> > +++ arch/i386/include/cpu.h 21 Sep 2010 01:46:00 -0000
> > @@ -106,6 +106,10 @@ struct cpu_info {
> > int ci_idepth;
> > u_int32_t ci_imask[NIPL];
> > u_int32_t ci_iunmask[NIPL];
> > +#ifdef DIAGNOSTIC
> > + int ci_mutex_level;
> > +#define __HAVE_CPU_MUTEX_LEVEL
> > +#endif
> >
> > paddr_t ci_idle_pcb_paddr; /* PA of idle PCB */
> > u_long ci_flags; /* flags; see below */
> > Index: arch/i386/i386/genassym.cf
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/i386/i386/genassym.cf,v
> > retrieving revision 1.31
> > diff -u -p -r1.31 genassym.cf
> > --- arch/i386/i386/genassym.cf 3 Jul 2010 04:54:32 -0000 1.31
> > +++ arch/i386/i386/genassym.cf 21 Sep 2010 01:46:00 -0000
> > @@ -207,6 +207,9 @@ define CPU_INFO_IUNMASK offsetof(struct
> > define CPU_INFO_ILEVEL offsetof(struct cpu_info, ci_ilevel)
> > define CPU_INFO_IDEPTH offsetof(struct cpu_info, ci_idepth)
> > define CPU_INFO_ISOURCES offsetof(struct cpu_info, ci_isources)
> > +ifdef DIAGNOSTIC
> > +define CPU_INFO_MUTEX_LEVEL offsetof(struct cpu_info,
> > ci_mutex_level)
> > +endif
> > define CPU_INFO_CURPMAP offsetof(struct cpu_info, ci_curpmap)
> >
> > struct pmap
> > Index: arch/i386/i386/mutex.S
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/i386/i386/mutex.S,v
> > retrieving revision 1.7
> > diff -u -p -r1.7 mutex.S
> > --- arch/i386/i386/mutex.S 13 Aug 2009 13:24:55 -0000 1.7
> > +++ arch/i386/i386/mutex.S 21 Sep 2010 01:46:00 -0000
> > @@ -63,6 +63,9 @@ ENTRY(mtx_enter)
> > testl %eax, %eax # if (already held)
> > jnz 3f
> > movl CPUVAR(SELF), %eax
> > +#ifdef DIAGNOSTIC
> > + incl CPU_INFO_MUTEX_LEVEL(%eax)
> > +#endif
> > movl %eax, MTX_OWNER(%ecx)
> > movl %edx, MTX_OLDIPL(%ecx)
> > leave
> > @@ -107,6 +110,9 @@ ENTRY(mtx_enter_try)
> > testl %eax, %eax # if (already held)
> > jnz 3f
> > movl CPUVAR(SELF), %eax
> > +#ifdef DIAGNOSTIC
> > + incl CPU_INFO_MUTEX_LEVEL(%eax)
> > +#endif
> > movl %eax, MTX_OWNER(%ecx)
> > movl %edx, MTX_OLDIPL(%ecx)
> > movl $1, %eax
> > @@ -130,7 +136,7 @@ ENTRY(mtx_enter_try)
> > #ifdef DIAGNOSTIC
> > 4: pushl $5f
> > call _C_LABEL(panic)
> > -5: .asciz "mtx_enter: locking against myself"
> > +5: .asciz "mtx_enter_try: locking against myself"
> > #endif
> >
> >
> > @@ -138,6 +144,12 @@ ENTRY(mtx_leave)
> > pushl %ebp
> > movl %esp, %ebp
> > movl SOFF(%ebp), %ecx
> > +#ifdef DIAGNOSTIC
> > + movl CPUVAR(SELF), %eax
> > + cmpl %eax, MTX_OWNER(%ecx)
> > + jne 1f
> > + decl CPU_INFO_MUTEX_LEVEL(%eax)
> > +#endif
> > xorl %eax, %eax
> > movl %eax, MTX_OWNER(%ecx)
> > pushl MTX_OLDIPL(%ecx)
> > @@ -146,3 +158,9 @@ ENTRY(mtx_leave)
> > call _C_LABEL(splx)
> > leave
> > ret
> > +
> > +#ifdef DIAGNOSTIC
> > +1: pushl $2f
> > + call _C_LABEL(panic)
> > +2: .asciz "mtx_leave: lock not held"
> > +#endif
> > Index: sys/rwlock.h
> > ===================================================================
> > RCS file: /cvs/src/sys/sys/rwlock.h,v
> > retrieving revision 1.12
> > diff -u -p -r1.12 rwlock.h
> > --- sys/rwlock.h 13 Aug 2009 21:22:29 -0000 1.12
> > +++ sys/rwlock.h 21 Sep 2010 01:46:00 -0000
> > @@ -95,9 +95,15 @@ void rw_enter_write(struct rwlock *);
> > void rw_exit_read(struct rwlock *);
> > void rw_exit_write(struct rwlock *);
> >
> > +#ifdef DIAGNOSTIC
> > void rw_assert_wrlock(struct rwlock *);
> > void rw_assert_rdlock(struct rwlock *);
> > void rw_assert_unlocked(struct rwlock *);
> > +#else
> > +#define rw_assert_wrlock(rwl) ((void)0)
> > +#define rw_assert_rdlock(rwl) ((void)0)
> > +#define rw_assert_unlocked(rwl) ((void)0)
> > +#endif
> >
> > int rw_enter(struct rwlock *, int);
> > void rw_exit(struct rwlock *);