Re: rwlock tweak
> Date: Mon, 16 Oct 2017 11:53:30 +0200 > From: Martin Pieuchot > > On 16/10/17(Mon) 11:36, Mark Kettenis wrote: > > > Date: Mon, 16 Oct 2017 10:52:09 +0200 > > > From: Martin Pieuchot > > > > > > As pointed by Mateusz Guzik [0], on x86 the cmpxchg{q,l} used by > > > rw_enter(9) and rw_exit(9) already include an implicit memory > > > barrier, so we can avoids using an explicit expensive one by > > > using the following variants. > > > > > > [0] https://marc.info/?l=openbsd-tech&m=150765959923113&w=2 > > > > > > ok? > > > > Is this really safe? The atomic instructions are executed > > conditionally... > > I guess you're talking about the "if ()" dance in the "optimized" > versions: rw_{enter,exit}_{read,write}(). If no cmpxchg{q,l} is > executed in theses functions _rw_{enter,exit}() will be called, > which contains another barrier and another CAS. Then it should be fine. > > > Index: kern/kern_rwlock.c > > > === > > > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > > > retrieving revision 1.31 > > > diff -u -p -r1.31 kern_rwlock.c > > > --- kern/kern_rwlock.c12 Oct 2017 09:19:45 - 1.31 > > > +++ kern/kern_rwlock.c16 Oct 2017 08:24:27 - > > > @@ -96,7 +96,7 @@ _rw_enter_read(struct rwlock *rwl LOCK_F > > > rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) > > > _rw_enter(rwl, RW_READ LOCK_FL_ARGS); > > > else { > > > - membar_enter(); > > > + membar_enter_after_atomic(); > > > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line, > > > NULL); > > > WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line); > > > @@ -112,7 +112,7 @@ _rw_enter_write(struct rwlock *rwl LOCK_ > > > RW_PROC(p) | RWLOCK_WRLOCK))) > > > _rw_enter(rwl, RW_WRITE LOCK_FL_ARGS); > > > else { > > > - membar_enter(); > > > + membar_enter_after_atomic(); > > > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, > > > LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL); > > > WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); > > > @@ -126,7 +126,7 @@ _rw_exit_read(struct rwlock *rwl LOCK_FL > > > > > > rw_assert_rdlock(rwl); > > > > > > - membar_exit(); > > > + membar_exit_before_atomic(); > > > if (__predict_false((owner & RWLOCK_WAIT) || > > > rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) > > > _rw_exit(rwl LOCK_FL_ARGS); > > > @@ -141,7 +141,7 @@ _rw_exit_write(struct rwlock *rwl LOCK_F > > > > > > rw_assert_wrlock(rwl); > > > > > > - membar_exit(); > > > + membar_exit_before_atomic(); > > > if (__predict_false((owner & RWLOCK_WAIT) || > > > rw_cas(&rwl->rwl_owner, owner, 0))) > > > _rw_exit(rwl LOCK_FL_ARGS); > > > @@ -261,7 +261,7 @@ retry: > > > > > > if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) > > > goto retry; > > > - membar_enter(); > > > + membar_enter_after_atomic(); > > > > > > /* > > >* If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we > > > @@ -295,7 +295,7 @@ _rw_exit(struct rwlock *rwl LOCK_FL_VARS > > > WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0, > > > file, line); > > > > > > - membar_exit(); > > > + membar_exit_before_atomic(); > > > do { > > > owner = rwl->rwl_owner; > > > if (wrlock) > > > > > > > > >
Re: rwlock tweak
On Mon, Oct 16, 2017 at 09:36 +, Mark Kettenis wrote: > > Date: Mon, 16 Oct 2017 10:52:09 +0200 > > From: Martin Pieuchot > > > > As pointed by Mateusz Guzik [0], on x86 the cmpxchg{q,l} used by > > rw_enter(9) and rw_exit(9) already include an implicit memory > > barrier, so we can avoids using an explicit expensive one by > > using the following variants. > > > > [0] https://marc.info/?l=openbsd-tech&m=150765959923113&w=2 > > > > ok? > > Is this really safe? The atomic instructions are executed > conditionally... > I've been running without membars on amd64 for about 3 weeks now and haven't noticed any adverse effects. On my laptop and APU2 serving internet for a hundred of people. It appears to me that access pattern in rwlock.c allows for that: no reads to be reordered that matter. But please take this with a grain of salt. A few people noted that a single membar per packet hugely affects forwarding performance, so a macro-benchmark can be set up to quantify the cost. > > Index: kern/kern_rwlock.c > > === > > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > > retrieving revision 1.31 > > diff -u -p -r1.31 kern_rwlock.c > > --- kern/kern_rwlock.c 12 Oct 2017 09:19:45 - 1.31 > > +++ kern/kern_rwlock.c 16 Oct 2017 08:24:27 - > > @@ -96,7 +96,7 @@ _rw_enter_read(struct rwlock *rwl LOCK_F > > rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) > > _rw_enter(rwl, RW_READ LOCK_FL_ARGS); > > else { > > - membar_enter(); > > + membar_enter_after_atomic(); > > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line, > > NULL); > > WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line); > > @@ -112,7 +112,7 @@ _rw_enter_write(struct rwlock *rwl LOCK_ > > RW_PROC(p) | RWLOCK_WRLOCK))) > > _rw_enter(rwl, RW_WRITE LOCK_FL_ARGS); > > else { > > - membar_enter(); > > + membar_enter_after_atomic(); > > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, > > LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL); > > WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); > > @@ -126,7 +126,7 @@ _rw_exit_read(struct rwlock *rwl LOCK_FL > > > > rw_assert_rdlock(rwl); > > > > - membar_exit(); > > + membar_exit_before_atomic(); > > if (__predict_false((owner & RWLOCK_WAIT) || > > rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) > > _rw_exit(rwl LOCK_FL_ARGS); > > @@ -141,7 +141,7 @@ _rw_exit_write(struct rwlock *rwl LOCK_F > > > > rw_assert_wrlock(rwl); > > > > - membar_exit(); > > + membar_exit_before_atomic(); > > if (__predict_false((owner & RWLOCK_WAIT) || > > rw_cas(&rwl->rwl_owner, owner, 0))) > > _rw_exit(rwl LOCK_FL_ARGS); > > @@ -261,7 +261,7 @@ retry: > > > > if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) > > goto retry; > > - membar_enter(); > > + membar_enter_after_atomic(); > > > > /* > > * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we > > @@ -295,7 +295,7 @@ _rw_exit(struct rwlock *rwl LOCK_FL_VARS > > WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0, > > file, line); > > > > - membar_exit(); > > + membar_exit_before_atomic(); > > do { > > owner = rwl->rwl_owner; > > if (wrlock) > > > > >
Re: rwlock tweak
On 16/10/17(Mon) 11:36, Mark Kettenis wrote: > > Date: Mon, 16 Oct 2017 10:52:09 +0200 > > From: Martin Pieuchot > > > > As pointed by Mateusz Guzik [0], on x86 the cmpxchg{q,l} used by > > rw_enter(9) and rw_exit(9) already include an implicit memory > > barrier, so we can avoids using an explicit expensive one by > > using the following variants. > > > > [0] https://marc.info/?l=openbsd-tech&m=150765959923113&w=2 > > > > ok? > > Is this really safe? The atomic instructions are executed > conditionally... I guess you're talking about the "if ()" dance in the "optimized" versions: rw_{enter,exit}_{read,write}(). If no cmpxchg{q,l} is executed in theses functions _rw_{enter,exit}() will be called, which contains another barrier and another CAS. > > Index: kern/kern_rwlock.c > > === > > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > > retrieving revision 1.31 > > diff -u -p -r1.31 kern_rwlock.c > > --- kern/kern_rwlock.c 12 Oct 2017 09:19:45 - 1.31 > > +++ kern/kern_rwlock.c 16 Oct 2017 08:24:27 - > > @@ -96,7 +96,7 @@ _rw_enter_read(struct rwlock *rwl LOCK_F > > rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) > > _rw_enter(rwl, RW_READ LOCK_FL_ARGS); > > else { > > - membar_enter(); > > + membar_enter_after_atomic(); > > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line, > > NULL); > > WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line); > > @@ -112,7 +112,7 @@ _rw_enter_write(struct rwlock *rwl LOCK_ > > RW_PROC(p) | RWLOCK_WRLOCK))) > > _rw_enter(rwl, RW_WRITE LOCK_FL_ARGS); > > else { > > - membar_enter(); > > + membar_enter_after_atomic(); > > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, > > LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL); > > WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); > > @@ -126,7 +126,7 @@ _rw_exit_read(struct rwlock *rwl LOCK_FL > > > > rw_assert_rdlock(rwl); > > > > - membar_exit(); > > + membar_exit_before_atomic(); > > if (__predict_false((owner & RWLOCK_WAIT) || > > rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) > > _rw_exit(rwl LOCK_FL_ARGS); > > @@ -141,7 +141,7 @@ _rw_exit_write(struct rwlock *rwl LOCK_F > > > > rw_assert_wrlock(rwl); > > > > - membar_exit(); > > + membar_exit_before_atomic(); > > if (__predict_false((owner & RWLOCK_WAIT) || > > rw_cas(&rwl->rwl_owner, owner, 0))) > > _rw_exit(rwl LOCK_FL_ARGS); > > @@ -261,7 +261,7 @@ retry: > > > > if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) > > goto retry; > > - membar_enter(); > > + membar_enter_after_atomic(); > > > > /* > > * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we > > @@ -295,7 +295,7 @@ _rw_exit(struct rwlock *rwl LOCK_FL_VARS > > WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0, > > file, line); > > > > - membar_exit(); > > + membar_exit_before_atomic(); > > do { > > owner = rwl->rwl_owner; > > if (wrlock) > > > > >
Re: rwlock tweak
> Date: Mon, 16 Oct 2017 10:52:09 +0200 > From: Martin Pieuchot > > As pointed by Mateusz Guzik [0], on x86 the cmpxchg{q,l} used by > rw_enter(9) and rw_exit(9) already include an implicit memory > barrier, so we can avoids using an explicit expensive one by > using the following variants. > > [0] https://marc.info/?l=openbsd-tech&m=150765959923113&w=2 > > ok? Is this really safe? The atomic instructions are executed conditionally... > Index: kern/kern_rwlock.c > === > RCS file: /cvs/src/sys/kern/kern_rwlock.c,v > retrieving revision 1.31 > diff -u -p -r1.31 kern_rwlock.c > --- kern/kern_rwlock.c12 Oct 2017 09:19:45 - 1.31 > +++ kern/kern_rwlock.c16 Oct 2017 08:24:27 - > @@ -96,7 +96,7 @@ _rw_enter_read(struct rwlock *rwl LOCK_F > rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) > _rw_enter(rwl, RW_READ LOCK_FL_ARGS); > else { > - membar_enter(); > + membar_enter_after_atomic(); > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line, > NULL); > WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line); > @@ -112,7 +112,7 @@ _rw_enter_write(struct rwlock *rwl LOCK_ > RW_PROC(p) | RWLOCK_WRLOCK))) > _rw_enter(rwl, RW_WRITE LOCK_FL_ARGS); > else { > - membar_enter(); > + membar_enter_after_atomic(); > WITNESS_CHECKORDER(&rwl->rwl_lock_obj, > LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL); > WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); > @@ -126,7 +126,7 @@ _rw_exit_read(struct rwlock *rwl LOCK_FL > > rw_assert_rdlock(rwl); > > - membar_exit(); > + membar_exit_before_atomic(); > if (__predict_false((owner & RWLOCK_WAIT) || > rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) > _rw_exit(rwl LOCK_FL_ARGS); > @@ -141,7 +141,7 @@ _rw_exit_write(struct rwlock *rwl LOCK_F > > rw_assert_wrlock(rwl); > > - membar_exit(); > + membar_exit_before_atomic(); > if (__predict_false((owner & RWLOCK_WAIT) || > rw_cas(&rwl->rwl_owner, owner, 0))) > _rw_exit(rwl LOCK_FL_ARGS); > @@ -261,7 +261,7 @@ retry: > > if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) > goto retry; > - membar_enter(); > + membar_enter_after_atomic(); > > /* >* If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we > @@ -295,7 +295,7 @@ _rw_exit(struct rwlock *rwl LOCK_FL_VARS > WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0, > file, line); > > - membar_exit(); > + membar_exit_before_atomic(); > do { > owner = rwl->rwl_owner; > if (wrlock) > >
rwlock tweak
As pointed by Mateusz Guzik [0], on x86 the cmpxchg{q,l} used by rw_enter(9) and rw_exit(9) already include an implicit memory barrier, so we can avoids using an explicit expensive one by using the following variants. [0] https://marc.info/?l=openbsd-tech&m=150765959923113&w=2 ok? Index: kern/kern_rwlock.c === RCS file: /cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.31 diff -u -p -r1.31 kern_rwlock.c --- kern/kern_rwlock.c 12 Oct 2017 09:19:45 - 1.31 +++ kern/kern_rwlock.c 16 Oct 2017 08:24:27 - @@ -96,7 +96,7 @@ _rw_enter_read(struct rwlock *rwl LOCK_F rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) _rw_enter(rwl, RW_READ LOCK_FL_ARGS); else { - membar_enter(); + membar_enter_after_atomic(); WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, file, line, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, 0, file, line); @@ -112,7 +112,7 @@ _rw_enter_write(struct rwlock *rwl LOCK_ RW_PROC(p) | RWLOCK_WRLOCK))) _rw_enter(rwl, RW_WRITE LOCK_FL_ARGS); else { - membar_enter(); + membar_enter_after_atomic(); WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_EXCLUSIVE | LOP_NEWORDER, file, line, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE, file, line); @@ -126,7 +126,7 @@ _rw_exit_read(struct rwlock *rwl LOCK_FL rw_assert_rdlock(rwl); - membar_exit(); + membar_exit_before_atomic(); if (__predict_false((owner & RWLOCK_WAIT) || rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR))) _rw_exit(rwl LOCK_FL_ARGS); @@ -141,7 +141,7 @@ _rw_exit_write(struct rwlock *rwl LOCK_F rw_assert_wrlock(rwl); - membar_exit(); + membar_exit_before_atomic(); if (__predict_false((owner & RWLOCK_WAIT) || rw_cas(&rwl->rwl_owner, owner, 0))) _rw_exit(rwl LOCK_FL_ARGS); @@ -261,7 +261,7 @@ retry: if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) goto retry; - membar_enter(); + membar_enter_after_atomic(); /* * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we @@ -295,7 +295,7 @@ _rw_exit(struct rwlock *rwl LOCK_FL_VARS WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0, file, line); - membar_exit(); + membar_exit_before_atomic(); do { owner = rwl->rwl_owner; if (wrlock)