There is a bug in how rw_exit_read() and rw_exit_write() use
WITNESS_UNLOCK(). The fast paths call WITNESS_UNLOCK() after releasing
the actual lock. This leads to a use-after-free in the checker if the
lock is dynamically allocated and another thread happens to free it
too early.

The following diff splits rw_exit() into two separate functions, one
which is the rw_exit(9) frontend, and the other, rw_do_exit(), which is
common between rw_exit(9), rw_exit_read(9) and rw_exit_write(9). This
makes it possible to call WITNESS_UNLOCK() before the actual lock
release without turning the code into an #ifdef maze.

The diff additionally does the following tweaks:

* Invoke membar_exit_before_atomic() only once even if the slow path is
  taken. The initial barrier is enough for the release semantics.

* Read rwl_owner for rw_cas() as late as possible so that the CAS
  operation would be less likely to fail. Note that in rw_exit() the
  purpose of the read is different; it determines the type of the lock
  (read / write).

* Put the rw_cas() in rw_exit() (now rw_do_exit()) inside
  __predict_false(). This compacts the resulting machine code a bit.

The code could be simplified by removing the fast path from
rw_exit_read() and rw_exit_write(), and always calling rw_do_exit().
However, that would reduce performance a tiny amount. I saw about 0.5%
increase in kernel build time in a test setup. The patch below does
not seem to affect performance negatively.

OK?

Index: kern/kern_rwlock.c
===================================================================
RCS file: src/sys/kern/kern_rwlock.c,v
retrieving revision 1.44
diff -u -p -r1.44 kern_rwlock.c
--- kern/kern_rwlock.c  30 Nov 2019 11:19:17 -0000      1.44
+++ kern/kern_rwlock.c  29 Feb 2020 16:24:59 -0000
@@ -25,6 +25,8 @@
 #include <sys/atomic.h>
 #include <sys/witness.h>
 
+void   rw_do_exit(struct rwlock *, unsigned long);
+
 /* XXX - temporary measure until proc0 is properly aligned */
 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
 
@@ -129,31 +131,31 @@ rw_enter_write(struct rwlock *rwl)
 void
 rw_exit_read(struct rwlock *rwl)
 {
-       unsigned long owner = rwl->rwl_owner;
+       unsigned long owner;
 
        rw_assert_rdlock(rwl);
+       WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
 
        membar_exit_before_atomic();
+       owner = rwl->rwl_owner;
        if (__predict_false((owner & RWLOCK_WAIT) ||
            rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
-               rw_exit(rwl);
-       else
-               WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
+               rw_do_exit(rwl, 0);
 }
 
 void
 rw_exit_write(struct rwlock *rwl)
 {
-       unsigned long owner = rwl->rwl_owner;
+       unsigned long owner;
 
        rw_assert_wrlock(rwl);
+       WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
 
        membar_exit_before_atomic();
+       owner = rwl->rwl_owner;
        if (__predict_false((owner & RWLOCK_WAIT) ||
            rw_cas(&rwl->rwl_owner, owner, 0)))
-               rw_exit(rwl);
-       else
-               WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
+               rw_do_exit(rwl, RWLOCK_WRLOCK);
 }
 
 #ifdef DIAGNOSTIC
@@ -314,22 +316,29 @@ retry:
 void
 rw_exit(struct rwlock *rwl)
 {
-       unsigned long owner = rwl->rwl_owner;
-       int wrlock = owner & RWLOCK_WRLOCK;
-       unsigned long set;
+       unsigned long wrlock;
 
        /* Avoid deadlocks after panic or in DDB */
        if (panicstr || db_active)
                return;
 
+       wrlock = rwl->rwl_owner & RWLOCK_WRLOCK;
        if (wrlock)
                rw_assert_wrlock(rwl);
        else
                rw_assert_rdlock(rwl);
-
        WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0);
 
        membar_exit_before_atomic();
+       rw_do_exit(rwl, wrlock);
+}
+
+/* membar_exit_before_atomic() has to precede call of this function. */
+void
+rw_do_exit(struct rwlock *rwl, unsigned long wrlock)
+{
+       unsigned long owner, set;
+
        do {
                owner = rwl->rwl_owner;
                if (wrlock)
@@ -337,7 +346,7 @@ rw_exit(struct rwlock *rwl)
                else
                        set = (owner - RWLOCK_READ_INCR) &
                                ~(RWLOCK_WAIT|RWLOCK_WRWANT);
-       } while (rw_cas(&rwl->rwl_owner, owner, set));
+       } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set)));
 
        if (owner & RWLOCK_WAIT)
                wakeup(rwl);

Reply via email to