On Thu, Sep 23, 2010 at 10:49:01PM -0700, Matthew Dempsky wrote:
> I'd like to commit this.  I've received positive reports from a few
> amd64 users and an i386 and softraid user, and all of the locking bugs
> exposed so far have already been fixed.

It's already exposed bugs in limited testing, so casting a wider net
(i.e., getting it into snaps) with it makes sense to me.

It also reminds me that I at one point intended to figure out how to
support enforcing dependencies between mutexes...

> 
> I plan to remove the "#define panic()" hacks and let future locking
> problems actually panic; if anyone thinks they should stay in for a
> bit longer, let me know.
> 
> ok?
> 
> 
> On Mon, Sep 20, 2010 at 08:00:27PM -0700, Matthew Dempsky wrote:
> > I'd really appreciate if people could test the diffs below and report
> > back to me.  This adds some warnings about lock API misuse that has
> > already caught two bugs in DRM.  Eventually, I want these to be panics
> > instead, but using just warnings for now should make it easier for
> > people to test.
> > 
> > Please apply the diff below to an up-to-date -current kernel and check
> > dmesg for any stack backtraces.  If you see any, please send them to
> > me.
> > 
> > I'd especially like reports from both i386 and amd64, and also from
> > softraid users.
> > 
> > 
> > More details on the diff: this adds per-processor mutex lock counting
> > to i386 and amd64, and updates assertwaitok() to check that this value
> > is 0.  They also now check that the process that calls mtx_leave() is
> > the same that called mtx_enter().  rw_exit*() are changed to call
> > rw_assert_{rd,wr}lock() as appropriate.  Finally, mi_switch() also
> > calls assertwaitok() now.
> > 
> > 
> > Index: kern/subr_xxx.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/subr_xxx.c,v
> > retrieving revision 1.10
> > diff -u -p -r1.10 subr_xxx.c
> > --- kern/subr_xxx.c 21 Sep 2010 01:09:10 -0000      1.10
> > +++ kern/subr_xxx.c 21 Sep 2010 01:45:59 -0000
> > @@ -153,6 +153,9 @@ blktochr(dev_t dev)
> >     return (NODEV);
> >  }
> >  
> > +#include <ddb/db_output.h>
> > +#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
> > +
> >  /*
> >   * Check that we're in a context where it's okay to sleep.
> >   */
> > @@ -160,4 +163,9 @@ void
> >  assertwaitok(void)
> >  {
> >     splassert(IPL_NONE);
> > +#ifdef __HAVE_CPU_MUTEX_LEVEL
> > +   if (curcpu()->ci_mutex_level != 0)
> > +           panic("assertwaitok: non-zero mutex count: %d",
> > +               curcpu()->ci_mutex_level);
> > +#endif
> >  }
> > Index: kern/kern_rwlock.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> > retrieving revision 1.15
> > diff -u -p -r1.15 kern_rwlock.c
> > --- kern/kern_rwlock.c      13 Aug 2009 23:12:15 -0000      1.15
> > +++ kern/kern_rwlock.c      21 Sep 2010 01:45:59 -0000
> > @@ -107,6 +107,8 @@ rw_exit_read(struct rwlock *rwl)
> >  {
> >     unsigned long owner = rwl->rwl_owner;
> >  
> > +   rw_assert_rdlock(rwl);
> > +
> >     if (__predict_false((owner & RWLOCK_WAIT) ||
> >         rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
> >             rw_exit(rwl);
> > @@ -117,6 +119,8 @@ rw_exit_write(struct rwlock *rwl)
> >  {
> >     unsigned long owner = rwl->rwl_owner;
> >  
> > +   rw_assert_wrlock(rwl);
> > +
> >     if (__predict_false((owner & RWLOCK_WAIT) ||
> >         rw_cas(&rwl->rwl_owner, owner, 0)))
> >             rw_exit(rwl);
> > @@ -236,6 +240,11 @@ rw_exit(struct rwlock *rwl)
> >     int wrlock = owner & RWLOCK_WRLOCK;
> >     unsigned long set;
> >  
> > +   if (wrlock)
> > +           rw_assert_wrlock(rwl);
> > +   else
> > +           rw_assert_rdlock(rwl);
> > +
> >     do {
> >             owner = rwl->rwl_owner;
> >             if (wrlock)
> > @@ -249,6 +258,10 @@ rw_exit(struct rwlock *rwl)
> >             wakeup(rwl);
> >  }
> >  
> > +#include <ddb/db_output.h>
> > +#define panic(x...) do { printf(x); db_stack_dump(); } while (0);
> > +
> > +#ifdef DIAGNOSTIC
> >  void
> >  rw_assert_wrlock(struct rwlock *rwl)
> >  {
> > @@ -272,3 +285,4 @@ rw_assert_unlocked(struct rwlock *rwl)
> >     if (rwl->rwl_owner != 0L)
> >             panic("%s: lock held", rwl->rwl_name);
> >  }
> > +#endif
> > Index: kern/sched_bsd.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> > retrieving revision 1.23
> > diff -u -p -r1.23 sched_bsd.c
> > --- kern/sched_bsd.c        30 Jun 2010 22:38:17 -0000      1.23
> > +++ kern/sched_bsd.c        21 Sep 2010 01:45:59 -0000
> > @@ -356,6 +356,7 @@ mi_switch(void)
> >     int sched_count;
> >  #endif
> >  
> > +   assertwaitok();
> >     KASSERT(p->p_stat != SONPROC);
> >  
> >     SCHED_ASSERT_LOCKED();
> > Index: arch/amd64/include/cpu.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> > retrieving revision 1.55
> > diff -u -p -r1.55 cpu.h
> > --- arch/amd64/include/cpu.h        11 Aug 2010 21:22:44 -0000      1.55
> > +++ arch/amd64/include/cpu.h        21 Sep 2010 01:46:00 -0000
> > @@ -87,6 +87,10 @@ struct cpu_info {
> >     int             ci_idepth;
> >     u_int32_t       ci_imask[NIPL];
> >     u_int32_t       ci_iunmask[NIPL];
> > +#ifdef DIAGNOSTIC
> > +   int             ci_mutex_level;
> > +#define __HAVE_CPU_MUTEX_LEVEL
> > +#endif
> >  
> >     u_int           ci_flags;
> >     u_int32_t       ci_ipis;
> > Index: arch/amd64/amd64/genassym.cf
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
> > retrieving revision 1.22
> > diff -u -p -r1.22 genassym.cf
> > --- arch/amd64/amd64/genassym.cf    1 Apr 2010 19:47:59 -0000       1.22
> > +++ arch/amd64/amd64/genassym.cf    21 Sep 2010 01:46:00 -0000
> > @@ -105,6 +105,9 @@ member  CPU_INFO_IDEPTH         ci_idepth
> >  member     CPU_INFO_ISOURCES       ci_isources
> >  member     CPU_INFO_IPENDING       ci_ipending
> >  member     CPU_INFO_IUNMASK        ci_iunmask
> > +ifdef DIAGNOSTIC
> > +member     CPU_INFO_MUTEX_LEVEL    ci_mutex_level
> > +endif
> >  member     CPU_INFO_GDT            ci_gdt
> >  
> >  struct     intrsource
> > Index: arch/amd64/amd64/mutex.S
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/mutex.S,v
> > retrieving revision 1.7
> > diff -u -p -r1.7 mutex.S
> > --- arch/amd64/amd64/mutex.S        13 Aug 2009 13:24:55 -0000      1.7
> > +++ arch/amd64/amd64/mutex.S        21 Sep 2010 01:46:00 -0000
> > @@ -62,6 +62,9 @@ ENTRY(mtx_enter)
> >     cmpxchgq        %rcx, MTX_OWNER(%rdi)   # test_and_set(mtx->mtx_owner)
> >     jne     2f
> >     movl    %edx, MTX_OLDIPL(%rdi)
> > +#ifdef DIAGNOSTIC
> > +   incl    CPU_INFO_MUTEX_LEVEL(%rcx)
> > +#endif
> >     ret
> >  
> >     /* We failed to obtain the lock. splx, spin and retry. */
> > @@ -103,6 +106,9 @@ ENTRY(mtx_enter_try)
> >     cmpxchgq        %rcx, MTX_OWNER(%rdi)   # test_and_set(mtx->mtx_owner)
> >     jne     2f
> >     movl    %edx, MTX_OLDIPL(%rdi)
> > +#ifdef DIAGNOSTIC
> > +   incl    CPU_INFO_MUTEX_LEVEL(%rcx)
> > +#endif
> >     movq    $1, %rax
> >     ret
> >  
> > @@ -122,12 +128,18 @@ ENTRY(mtx_enter_try)
> >  #ifdef DIAGNOSTIC
> >  3: movq    $4f, %rdi
> >     call    _C_LABEL(panic)
> > -4: .asciz  "mtx_enter: locking against myself"
> > +4: .asciz  "mtx_enter_try: locking against myself"
> >  #endif
> >  
> >  
> >  ENTRY(mtx_leave)
> >     movq    %rdi, %rax
> > +#ifdef DIAGNOSTIC
> > +   movq    CPUVAR(SELF), %rcx
> > +   cmpq    MTX_OWNER(%rax), %rcx
> > +   jne     2f
> > +   decl    CPU_INFO_MUTEX_LEVEL(%rcx)
> > +#endif
> >     xorq    %rcx, %rcx
> >     movl    MTX_OLDIPL(%rax), %edi
> >     movl    %ecx, MTX_OLDIPL(%rax)
> > @@ -137,3 +149,9 @@ ENTRY(mtx_leave)
> >     call    _C_LABEL(spllower)
> >  1:
> >     ret
> > +
> > +#ifdef DIAGNOSTIC
> > +2: movq    $3f, %rdi
> > +   call    _C_LABEL(panic)
> > +3: .asciz  "mtx_leave: lock not held"
> > +#endif
> > Index: arch/i386/include/cpu.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/i386/include/cpu.h,v
> > retrieving revision 1.114
> > diff -u -p -r1.114 cpu.h
> > --- arch/i386/include/cpu.h 11 Aug 2010 21:22:44 -0000      1.114
> > +++ arch/i386/include/cpu.h 21 Sep 2010 01:46:00 -0000
> > @@ -106,6 +106,10 @@ struct cpu_info {
> >     int             ci_idepth;
> >     u_int32_t       ci_imask[NIPL];
> >     u_int32_t       ci_iunmask[NIPL];
> > +#ifdef DIAGNOSTIC
> > +   int             ci_mutex_level;
> > +#define __HAVE_CPU_MUTEX_LEVEL
> > +#endif
> >  
> >     paddr_t         ci_idle_pcb_paddr; /* PA of idle PCB */
> >     u_long          ci_flags;       /* flags; see below */
> > Index: arch/i386/i386/genassym.cf
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/i386/i386/genassym.cf,v
> > retrieving revision 1.31
> > diff -u -p -r1.31 genassym.cf
> > --- arch/i386/i386/genassym.cf      3 Jul 2010 04:54:32 -0000       1.31
> > +++ arch/i386/i386/genassym.cf      21 Sep 2010 01:46:00 -0000
> > @@ -207,6 +207,9 @@ define  CPU_INFO_IUNMASK        offsetof(struct 
> >  define     CPU_INFO_ILEVEL         offsetof(struct cpu_info, ci_ilevel)
> >  define     CPU_INFO_IDEPTH         offsetof(struct cpu_info, ci_idepth)
> >  define     CPU_INFO_ISOURCES       offsetof(struct cpu_info, ci_isources)
> > +ifdef DIAGNOSTIC
> > +define     CPU_INFO_MUTEX_LEVEL    offsetof(struct cpu_info, 
> > ci_mutex_level)
> > +endif
> >  define     CPU_INFO_CURPMAP        offsetof(struct cpu_info, ci_curpmap)
> >  
> >  struct pmap
> > Index: arch/i386/i386/mutex.S
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/i386/i386/mutex.S,v
> > retrieving revision 1.7
> > diff -u -p -r1.7 mutex.S
> > --- arch/i386/i386/mutex.S  13 Aug 2009 13:24:55 -0000      1.7
> > +++ arch/i386/i386/mutex.S  21 Sep 2010 01:46:00 -0000
> > @@ -63,6 +63,9 @@ ENTRY(mtx_enter)
> >     testl   %eax, %eax              # if (already held)
> >     jnz     3f
> >     movl    CPUVAR(SELF), %eax
> > +#ifdef DIAGNOSTIC
> > +   incl    CPU_INFO_MUTEX_LEVEL(%eax)
> > +#endif
> >     movl    %eax, MTX_OWNER(%ecx)
> >     movl    %edx, MTX_OLDIPL(%ecx)
> >     leave
> > @@ -107,6 +110,9 @@ ENTRY(mtx_enter_try)
> >     testl   %eax, %eax              # if (already held)
> >     jnz     3f
> >     movl    CPUVAR(SELF), %eax
> > +#ifdef DIAGNOSTIC
> > +   incl    CPU_INFO_MUTEX_LEVEL(%eax)
> > +#endif
> >     movl    %eax, MTX_OWNER(%ecx)
> >     movl    %edx, MTX_OLDIPL(%ecx)
> >     movl    $1, %eax
> > @@ -130,7 +136,7 @@ ENTRY(mtx_enter_try)
> >  #ifdef DIAGNOSTIC
> >  4: pushl   $5f
> >     call    _C_LABEL(panic)
> > -5: .asciz  "mtx_enter: locking against myself"
> > +5: .asciz  "mtx_enter_try: locking against myself"
> >  #endif
> >  
> >     
> > @@ -138,6 +144,12 @@ ENTRY(mtx_leave)
> >     pushl   %ebp
> >     movl    %esp, %ebp
> >     movl    SOFF(%ebp), %ecx
> > +#ifdef DIAGNOSTIC
> > +   movl    CPUVAR(SELF), %eax
> > +   cmpl    %eax, MTX_OWNER(%ecx)
> > +   jne     1f
> > +   decl    CPU_INFO_MUTEX_LEVEL(%eax)
> > +#endif
> >     xorl    %eax, %eax
> >     movl    %eax, MTX_OWNER(%ecx)
> >     pushl   MTX_OLDIPL(%ecx)
> > @@ -146,3 +158,9 @@ ENTRY(mtx_leave)
> >     call    _C_LABEL(splx)
> >     leave
> >     ret
> > +
> > +#ifdef DIAGNOSTIC
> > +1: pushl   $2f
> > +   call    _C_LABEL(panic)
> > +2: .asciz  "mtx_leave: lock not held"
> > +#endif
> > Index: sys/rwlock.h
> > ===================================================================
> > RCS file: /cvs/src/sys/sys/rwlock.h,v
> > retrieving revision 1.12
> > diff -u -p -r1.12 rwlock.h
> > --- sys/rwlock.h    13 Aug 2009 21:22:29 -0000      1.12
> > +++ sys/rwlock.h    21 Sep 2010 01:46:00 -0000
> > @@ -95,9 +95,15 @@ void rw_enter_write(struct rwlock *);
> >  void rw_exit_read(struct rwlock *);
> >  void rw_exit_write(struct rwlock *);
> >  
> > +#ifdef DIAGNOSTIC
> >  void rw_assert_wrlock(struct rwlock *);
> >  void rw_assert_rdlock(struct rwlock *);
> >  void rw_assert_unlocked(struct rwlock *);
> > +#else
> > +#define rw_assert_wrlock(rwl) ((void)0)
> > +#define rw_assert_rdlock(rwl) ((void)0)
> > +#define rw_assert_unlocked(rwl) ((void)0)
> > +#endif
> >  
> >  int rw_enter(struct rwlock *, int);
> >  void rw_exit(struct rwlock *);

Reply via email to