Oops, sorry for the mixup below. I got the e-mail bounced from someone
and it used their 'From' instead of the original. Regardless,
technical contend stands. :)

On 3/2/20, Mateusz Guzik <mjgu...@gmail.com> wrote:
> On 2/29/20, Visa Hankala <wild.prickly.sh...@gmail.com> wrote:
>> There is a bug in how rw_exit_read() and rw_exit_write() use
>> WITNESS_UNLOCK(). The fast paths call WITNESS_UNLOCK() after releasing
>> the actual lock. This leads to a use-after-free in the checker if the
>> lock is dynamically allocated and another thread happens to free it
>> too early.
>>
>> The following diff splits rw_exit() into two separate functions, one
>> which is the rw_exit(9) frontend, and the other, rw_do_exit(), which is
>> common between rw_exit(9), rw_exit_read(9) and rw_exit_write(9). This
>> makes it possible to call WITNESS_UNLOCK() before the actual lock
>> release without turning the code into an #ifdef maze.
>>
>> The diff additionally does the following tweaks:
>>
>> * Invoke membar_exit_before_atomic() only once even if the slow path is
>>   taken. The initial barrier is enough for the release semantics.
>>
>> * Read rwl_owner for rw_cas() as late as possible so that the CAS
>>   operation would be less likely to fail. Note that in rw_exit() the
>>   purpose of the read is different; it determines the type of the lock
>>   (read / write).
>>
>> * Put the rw_cas() in rw_exit() (now rw_do_exit()) inside
>>   __predict_false(). This compacts the resulting machine code a bit.
>>
>> The code could be simplified by removing the fast path from
>> rw_exit_read() and rw_exit_write(), and always calling rw_do_exit().
>> However, that would reduce performance a tiny amount. I saw about 0.5%
>> increase in kernel build time in a test setup. The patch below does
>> not seem to affect performance negatively.
>>
>> OK?
>
> If the slowdown is repeatable it most likely happens because of the
> immediate re-read in the loop in the slow path.
>
> Also note performance bugs in unlock:
> - *any* read unlock wakes up everyone, so if the lock has many blocked
> waiters they will all wake up only be likely to go back to sleep. On
> the other hand this may be an anomaly which sometimes improves
> performance because it in corner cases it can mitigate lack of
> adaptive spinning
> - the loop keeps re-reading the word instead of using the value
> returned by cmpxchg. so happens rw_cas macro explicitly throws it away
>
> Splitting unlock paths between reader and writer cases is unavoidable
> in the long term (to allow handling different wake up policies) and I
> think this is a great opportunity to do it all the way.
>
> That said, I propose you do roughly this:
>
> static void __always_inline
> rw_exit_read_impl(struct rwlock *rwl, unsigned long v)
> {
>
>         rw_assert_rdlock(rwl);
>         WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
>
>         .... cas loop + wake up come here
> }
>
> void
> rw_exit_read(struct rwlock *rwl)
> {
>
>         membar_exit_before_atomic();
>         rw_exit_read_impl(rwl, rwl->rwl_owner);
> }
>
> void
> rw_exit(struct rwlock *rwl)
> {
>         unsigned long v;
>
>         membar_exit_before_atomic();
>         v = rwl->rwl_owner;
>         if (v & RWLOCK_WRLOCK)
>                 rw_exit_write_impl(rwl, v);
>         else
>                 rw_exit_read_impl(rwl, v);
> }
>
> And similarly for write.
>
> That is, the bottom routines assume the fence was posted and they got
> a "fresh" value.
>
>>
>> Index: kern/kern_rwlock.c
>> ===================================================================
>> RCS file: src/sys/kern/kern_rwlock.c,v
>> retrieving revision 1.44
>> diff -u -p -r1.44 kern_rwlock.c
>> --- kern/kern_rwlock.c       30 Nov 2019 11:19:17 -0000      1.44
>> +++ kern/kern_rwlock.c       29 Feb 2020 16:24:59 -0000
>> @@ -25,6 +25,8 @@
>>  #include <sys/atomic.h>
>>  #include <sys/witness.h>
>>
>> +void        rw_do_exit(struct rwlock *, unsigned long);
>> +
>>  /* XXX - temporary measure until proc0 is properly aligned */
>>  #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
>>
>> @@ -129,31 +131,31 @@ rw_enter_write(struct rwlock *rwl)
>>  void
>>  rw_exit_read(struct rwlock *rwl)
>>  {
>> -    unsigned long owner = rwl->rwl_owner;
>> +    unsigned long owner;
>>
>>      rw_assert_rdlock(rwl);
>> +    WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
>>
>>      membar_exit_before_atomic();
>> +    owner = rwl->rwl_owner;
>>      if (__predict_false((owner & RWLOCK_WAIT) ||
>>          rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
>> -            rw_exit(rwl);
>> -    else
>> -            WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
>> +            rw_do_exit(rwl, 0);
>>  }
>>
>>  void
>>  rw_exit_write(struct rwlock *rwl)
>>  {
>> -    unsigned long owner = rwl->rwl_owner;
>> +    unsigned long owner;
>>
>>      rw_assert_wrlock(rwl);
>> +    WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
>>
>>      membar_exit_before_atomic();
>> +    owner = rwl->rwl_owner;
>>      if (__predict_false((owner & RWLOCK_WAIT) ||
>>          rw_cas(&rwl->rwl_owner, owner, 0)))
>> -            rw_exit(rwl);
>> -    else
>> -            WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
>> +            rw_do_exit(rwl, RWLOCK_WRLOCK);
>>  }
>>
>>  #ifdef DIAGNOSTIC
>> @@ -314,22 +316,29 @@ retry:
>>  void
>>  rw_exit(struct rwlock *rwl)
>>  {
>> -    unsigned long owner = rwl->rwl_owner;
>> -    int wrlock = owner & RWLOCK_WRLOCK;
>> -    unsigned long set;
>> +    unsigned long wrlock;
>>
>>      /* Avoid deadlocks after panic or in DDB */
>>      if (panicstr || db_active)
>>              return;
>>
>> +    wrlock = rwl->rwl_owner & RWLOCK_WRLOCK;
>>      if (wrlock)
>>              rw_assert_wrlock(rwl);
>>      else
>>              rw_assert_rdlock(rwl);
>> -
>>      WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0);
>>
>>      membar_exit_before_atomic();
>> +    rw_do_exit(rwl, wrlock);
>> +}
>> +
>> +/* membar_exit_before_atomic() has to precede call of this function. */
>> +void
>> +rw_do_exit(struct rwlock *rwl, unsigned long wrlock)
>> +{
>> +    unsigned long owner, set;
>> +
>>      do {
>>              owner = rwl->rwl_owner;
>>              if (wrlock)
>> @@ -337,7 +346,7 @@ rw_exit(struct rwlock *rwl)
>>              else
>>                      set = (owner - RWLOCK_READ_INCR) &
>>                              ~(RWLOCK_WAIT|RWLOCK_WRWANT);
>> -    } while (rw_cas(&rwl->rwl_owner, owner, set));
>> +    } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set)));
>>
>>      if (owner & RWLOCK_WAIT)
>>              wakeup(rwl);
>>
>>
>
>
> --
> Mateusz Guzik <mjguzik gmail.com>
>


-- 
Mateusz Guzik <mjguzik gmail.com>

Reply via email to