Summer is really warm here. No need to make my machines hotter by spinning a lot. So here's a futex(2) based pthread_rwlock* implementation. I should look familiar to those of you that reviewed the mutex & semaphore implementations. It uses amotic cas & inc/dec.
I moved the existing implementation to rthread_rwlock_compat.c to match what has been done for semaphores. Tests, benchmarks and reviews are more than welcome! Index: libc/include/thread_private.h =================================================================== RCS file: /cvs/src/lib/libc/include/thread_private.h,v retrieving revision 1.34 diff -u -p -r1.34 thread_private.h --- libc/include/thread_private.h 10 Jan 2019 18:45:33 -0000 1.34 +++ libc/include/thread_private.h 23 Jan 2019 19:34:36 -0000 @@ -298,6 +298,11 @@ struct pthread_cond { struct pthread_mutex *mutex; }; +struct pthread_rwlock { + volatile unsigned int value; + volatile unsigned int waiters; +}; + #else struct pthread_mutex { @@ -314,6 +319,13 @@ struct pthread_cond { struct pthread_queue waiters; struct pthread_mutex *mutex; clockid_t clock; +}; + +struct pthread_rwlock { + _atomic_lock_t lock; + pthread_t owner; + struct pthread_queue writers; + int readers; }; #endif /* FUTEX */ Index: librthread/Makefile =================================================================== RCS file: /cvs/src/lib/librthread/Makefile,v retrieving revision 1.54 diff -u -p -r1.54 Makefile --- librthread/Makefile 12 Jan 2019 00:16:03 -0000 1.54 +++ librthread/Makefile 23 Jan 2019 20:07:29 -0000 @@ -27,7 +27,6 @@ SRCS= rthread.c \ rthread_mutex_prio.c \ rthread_mutexattr.c \ rthread_np.c \ - rthread_rwlock.c \ rthread_rwlockattr.c \ rthread_sched.c \ rthread_stack.c \ @@ -40,9 +39,12 @@ SRCS= rthread.c \ ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "mips64" || \ ${MACHINE_ARCH} == "mips64el" || ${MACHINE_ARCH} == "powerpc" || \ ${MACHINE_ARCH} == "sparc64" -SRCS+= rthread_sem.c +CFLAGS+= -DFUTEX +SRCS+= rthread_sem.c \ + rthread_rwlock.c .else -SRCS+= rthread_sem_compat.c +SRCS+= rthread_sem_compat.c \ + rthread_rwlock_compat.c .endif SRCDIR= ${.CURDIR}/../libpthread Index: librthread/rthread.h =================================================================== RCS file: /cvs/src/lib/librthread/rthread.h,v retrieving revision 1.63 diff -u -p -r1.63 rthread.h --- librthread/rthread.h 5 Sep 2017 02:40:54 -0000 1.63 +++ librthread/rthread.h 23 Jan 2019 17:02:51 -0000 @@ -52,13 +52,6 @@ struct stack { #define PTHREAD_MAX_PRIORITY 31 -struct pthread_rwlock { - _atomic_lock_t lock; - pthread_t owner; - struct pthread_queue writers; - int readers; -}; - struct pthread_rwlockattr { int pshared; }; Index: librthread/rthread_rwlock.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread_rwlock.c,v retrieving revision 1.11 diff -u -p -r1.11 rthread_rwlock.c --- librthread/rthread_rwlock.c 24 Apr 2018 16:28:42 -0000 1.11 +++ librthread/rthread_rwlock.c 23 Jan 2019 20:30:22 -0000 @@ -1,8 +1,7 @@ /* $OpenBSD: rthread_rwlock.c,v 1.11 2018/04/24 16:28:42 pirofti Exp $ */ /* - * Copyright (c) 2004,2005 Ted Unangst <t...@openbsd.org> + * Copyright (c) 2018 Martin Pieuchot <m...@openbsd.org> * Copyright (c) 2012 Philip Guenther <guent...@openbsd.org> - * All Rights Reserved. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -16,11 +15,7 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* - * rwlocks - */ -#include <assert.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> @@ -28,6 +23,20 @@ #include <pthread.h> #include "rthread.h" +#include "synch.h" + +#define UNLOCKED 0 +#define MAXREADER 0x7ffffffe +#define WRITER 0x7fffffff +#define WAITING 0x80000000 +#define COUNT(v) ((v) & WRITER) + +#define SPIN_COUNT 128 +#if defined(__i386__) || defined(__amd64__) +#define SPIN_WAIT() asm volatile("pause": : : "memory") +#else +#define SPIN_WAIT() do { } while (0) +#endif static _atomic_lock_t rwlock_init_lock = _SPINLOCK_UNLOCKED; @@ -35,15 +44,13 @@ int pthread_rwlock_init(pthread_rwlock_t *lockp, const pthread_rwlockattr_t *attrp __unused) { - pthread_rwlock_t lock; + pthread_rwlock_t rwlock; - lock = calloc(1, sizeof(*lock)); - if (!lock) + rwlock = calloc(1, sizeof(*rwlock)); + if (!rwlock) return (errno); - lock->lock = _SPINLOCK_UNLOCKED; - TAILQ_INIT(&lock->writers); - *lockp = lock; + *lockp = rwlock; return (0); } @@ -52,26 +59,25 @@ DEF_STD(pthread_rwlock_init); int pthread_rwlock_destroy(pthread_rwlock_t *lockp) { - pthread_rwlock_t lock; + pthread_rwlock_t rwlock; - assert(lockp); - lock = *lockp; - if (lock) { - if (lock->readers || !TAILQ_EMPTY(&lock->writers)) { + rwlock = *lockp; + if (rwlock) { + if (rwlock->value != 0 || rwlock->waiters > 0) { #define MSG "pthread_rwlock_destroy on rwlock with waiters!\n" write(2, MSG, sizeof(MSG) - 1); #undef MSG return (EBUSY); } - free(lock); + free((void *)rwlock); + *lockp = NULL; } - *lockp = NULL; return (0); } static int -_rthread_rwlock_ensure_init(pthread_rwlock_t *lockp) +_rthread_rwlock_ensure_init(pthread_rwlock_t *rwlockp) { int ret = 0; @@ -79,182 +85,200 @@ _rthread_rwlock_ensure_init(pthread_rwlo * If the rwlock is statically initialized, perform the dynamic * initialization. */ - if (*lockp == NULL) - { + if (*rwlockp == NULL) { _spinlock(&rwlock_init_lock); - if (*lockp == NULL) - ret = pthread_rwlock_init(lockp, NULL); + if (*rwlockp == NULL) + ret = pthread_rwlock_init(rwlockp, NULL); _spinunlock(&rwlock_init_lock); } return (ret); } +static int +_rthread_rwlock_tryrdlock(pthread_rwlock_t rwlock) +{ + unsigned int val; + + do { + val = rwlock->value; + if (COUNT(val) == WRITER) + return (EBUSY); + if (COUNT(val) == MAXREADER) + return (EAGAIN); + } while (atomic_cas_uint(&rwlock->value, val, val + 1) != val); + + membar_enter_after_atomic(); + return (0); +} static int -_rthread_rwlock_rdlock(pthread_rwlock_t *lockp, const struct timespec *abstime, - int try) +_rthread_rwlock_timedrdlock(pthread_rwlock_t *rwlockp, int trywait, + const struct timespec *abs, int timed) { - pthread_rwlock_t lock; - pthread_t thread = pthread_self(); - int error; + pthread_t self = pthread_self(); + pthread_rwlock_t rwlock; + unsigned int val, new; + int i, error; - if ((error = _rthread_rwlock_ensure_init(lockp))) + if ((error = _rthread_rwlock_ensure_init(rwlockp))) return (error); - lock = *lockp; - _rthread_debug(5, "%p: rwlock_rdlock %p\n", (void *)thread, - (void *)lock); - _spinlock(&lock->lock); - - /* writers have precedence */ - if (lock->owner == NULL && TAILQ_EMPTY(&lock->writers)) - lock->readers++; - else if (try) - error = EBUSY; - else if (lock->owner == thread) - error = EDEADLK; - else { - do { - if (__thrsleep(lock, CLOCK_REALTIME, abstime, - &lock->lock, NULL) == EWOULDBLOCK) - return (ETIMEDOUT); - _spinlock(&lock->lock); - } while (lock->owner != NULL || !TAILQ_EMPTY(&lock->writers)); - lock->readers++; + rwlock = *rwlockp; + _rthread_debug(5, "%p: rwlock_%srdlock %p (%u)\n", self, + (timed ? "timed" : (trywait ? "try" : "")), (void *)rwlock, + rwlock->value); + + error = _rthread_rwlock_tryrdlock(rwlock); + if (error != EBUSY || trywait) + return (error); + + /* Try hard to not enter the kernel. */ + for (i = 0; i < SPIN_COUNT; i ++) { + if (rwlock->value == UNLOCKED || rwlock->waiters != 0) + break; + + SPIN_WAIT(); + } + + while ((error = _rthread_rwlock_tryrdlock(rwlock)) == EBUSY) { + val = rwlock->value; + if (val == UNLOCKED || (COUNT(val)) != WRITER) + continue; + new = val | WAITING; + atomic_inc_int(&rwlock->waiters); + if (atomic_cas_uint(&rwlock->value, val, new) == val) { + membar_enter_after_atomic(); + error = _twait(&rwlock->value, new, CLOCK_REALTIME, + abs); + } + atomic_dec_int(&rwlock->waiters); + if (error == ETIMEDOUT) + break; } - _spinunlock(&lock->lock); return (error); + } int -pthread_rwlock_rdlock(pthread_rwlock_t *lockp) +pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlockp) { - return (_rthread_rwlock_rdlock(lockp, NULL, 0)); + return (_rthread_rwlock_timedrdlock(rwlockp, 1, NULL, 0)); } int -pthread_rwlock_tryrdlock(pthread_rwlock_t *lockp) +pthread_rwlock_timedrdlock(pthread_rwlock_t *rwlockp, + const struct timespec *abs) { - return (_rthread_rwlock_rdlock(lockp, NULL, 1)); + return (_rthread_rwlock_timedrdlock(rwlockp, 0, abs, 1)); } int -pthread_rwlock_timedrdlock(pthread_rwlock_t *lockp, - const struct timespec *abstime) +pthread_rwlock_rdlock(pthread_rwlock_t *rwlockp) { - if (abstime == NULL || abstime->tv_nsec < 0 || - abstime->tv_nsec >= 1000000000) - return (EINVAL); - return (_rthread_rwlock_rdlock(lockp, abstime, 0)); + return (_rthread_rwlock_timedrdlock(rwlockp, 0, NULL, 0)); +} + +static int +_rthread_rwlock_tryrwlock(pthread_rwlock_t rwlock) +{ + if (atomic_cas_uint(&rwlock->value, UNLOCKED, WRITER) != UNLOCKED) + return (EBUSY); + + membar_enter_after_atomic(); + return (0); } static int -_rthread_rwlock_wrlock(pthread_rwlock_t *lockp, const struct timespec *abstime, - int try) +_rthread_rwlock_timedwrlock(pthread_rwlock_t *rwlockp, int trywait, + const struct timespec *abs, int timed) { - pthread_rwlock_t lock; - pthread_t thread = pthread_self(); - int error; + pthread_t self = pthread_self(); + pthread_rwlock_t rwlock; + unsigned int val, new; + int i, error; - if ((error = _rthread_rwlock_ensure_init(lockp))) + if ((error = _rthread_rwlock_ensure_init(rwlockp))) return (error); - lock = *lockp; + rwlock = *rwlockp; + _rthread_debug(5, "%p: rwlock_%swrlock %p (%u)\n", self, + (timed ? "timed" : (trywait ? "try" : "")), (void *)rwlock, + rwlock->value); - _rthread_debug(5, "%p: rwlock_timedwrlock %p\n", (void *)thread, - (void *)lock); - _spinlock(&lock->lock); - if (lock->readers == 0 && lock->owner == NULL) - lock->owner = thread; - else if (try) - error = EBUSY; - else if (lock->owner == thread) - error = EDEADLK; - else { - int do_wait; - - /* gotta block */ - TAILQ_INSERT_TAIL(&lock->writers, thread, waiting); - do { - do_wait = __thrsleep(thread, CLOCK_REALTIME, abstime, - &lock->lock, NULL) != EWOULDBLOCK; - _spinlock(&lock->lock); - } while (lock->owner != thread && do_wait); - - if (lock->owner != thread) { - /* timed out, sigh */ - TAILQ_REMOVE(&lock->writers, thread, waiting); - error = ETIMEDOUT; + error = _rthread_rwlock_tryrwlock(rwlock); + if (error != EBUSY || trywait) + return (error); + + /* Try hard to not enter the kernel. */ + for (i = 0; i < SPIN_COUNT; i ++) { + if (rwlock->value == UNLOCKED || rwlock->waiters != 0) + break; + + SPIN_WAIT(); + } + + while ((error = _rthread_rwlock_tryrwlock(rwlock)) == EBUSY) { + val = rwlock->value; + if (val == UNLOCKED) + continue; + new = val | WAITING; + atomic_inc_int(&rwlock->waiters); + if (atomic_cas_uint(&rwlock->value, val, new) == val) { + membar_enter_after_atomic(); + error = _twait(&rwlock->value, new, CLOCK_REALTIME, + abs); } + atomic_dec_int(&rwlock->waiters); + if (error == ETIMEDOUT) + break; } - _spinunlock(&lock->lock); return (error); } int -pthread_rwlock_wrlock(pthread_rwlock_t *lockp) +pthread_rwlock_trywrlock(pthread_rwlock_t *rwlockp) { - return (_rthread_rwlock_wrlock(lockp, NULL, 0)); + return (_rthread_rwlock_timedwrlock(rwlockp, 1, NULL, 0)); } int -pthread_rwlock_trywrlock(pthread_rwlock_t *lockp) +pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlockp, + const struct timespec *abs) { - return (_rthread_rwlock_wrlock(lockp, NULL, 1)); + return (_rthread_rwlock_timedwrlock(rwlockp, 0, abs, 1)); } int -pthread_rwlock_timedwrlock(pthread_rwlock_t *lockp, - const struct timespec *abstime) +pthread_rwlock_wrlock(pthread_rwlock_t *rwlockp) { - if (abstime == NULL || abstime->tv_nsec < 0 || - abstime->tv_nsec >= 1000000000) - return (EINVAL); - return (_rthread_rwlock_wrlock(lockp, abstime, 0)); + return (_rthread_rwlock_timedwrlock(rwlockp, 0, NULL, 0)); } - int -pthread_rwlock_unlock(pthread_rwlock_t *lockp) +pthread_rwlock_unlock(pthread_rwlock_t *rwlockp) { - pthread_rwlock_t lock; - pthread_t thread = pthread_self(); - pthread_t next; - int was_writer; - - lock = *lockp; - - _rthread_debug(5, "%p: rwlock_unlock %p\n", (void *)thread, - (void *)lock); - _spinlock(&lock->lock); - if (lock->owner != NULL) { - assert(lock->owner == thread); - was_writer = 1; - } else { - assert(lock->readers > 0); - lock->readers--; - if (lock->readers > 0) - goto out; - was_writer = 0; - } - - lock->owner = next = TAILQ_FIRST(&lock->writers); - if (next != NULL) { - /* dequeue and wake first writer */ - TAILQ_REMOVE(&lock->writers, next, waiting); - _spinunlock(&lock->lock); - __thrwakeup(next, 1); - return (0); - } - - /* could there have been blocked readers? wake them all */ - if (was_writer) - __thrwakeup(lock, 0); -out: - _spinunlock(&lock->lock); + pthread_t self = pthread_self(); + pthread_rwlock_t rwlock; + unsigned int val, new, waiters; + + rwlock = *rwlockp; + _rthread_debug(5, "%p: rwlock_unlock %p\n", self, (void *)rwlock); + + do { + val = rwlock->value; + waiters = rwlock->waiters; + if (COUNT(val) == WRITER || COUNT(val) == 1) + new = UNLOCKED; + else + new = val - 1; + } while (atomic_cas_uint(&rwlock->value, val, new) != val); + + membar_enter_after_atomic(); + if (waiters && new == UNLOCKED) + _wake(&rwlock->value, COUNT(val)); return (0); } Index: librthread/rthread_rwlock_compat.c =================================================================== RCS file: librthread/rthread_rwlock_compat.c diff -N librthread/rthread_rwlock_compat.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ librthread/rthread_rwlock_compat.c 23 Jan 2019 16:58:03 -0000 @@ -0,0 +1,260 @@ +/* $OpenBSD: rthread_rwlock.c,v 1.11 2018/04/24 16:28:42 pirofti Exp $ */ +/* + * Copyright (c) 2004,2005 Ted Unangst <t...@openbsd.org> + * Copyright (c) 2012 Philip Guenther <guent...@openbsd.org> + * All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * rwlocks + */ + +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include <pthread.h> + +#include "rthread.h" + +static _atomic_lock_t rwlock_init_lock = _SPINLOCK_UNLOCKED; + +int +pthread_rwlock_init(pthread_rwlock_t *lockp, + const pthread_rwlockattr_t *attrp __unused) +{ + pthread_rwlock_t lock; + + lock = calloc(1, sizeof(*lock)); + if (!lock) + return (errno); + lock->lock = _SPINLOCK_UNLOCKED; + TAILQ_INIT(&lock->writers); + + *lockp = lock; + + return (0); +} +DEF_STD(pthread_rwlock_init); + +int +pthread_rwlock_destroy(pthread_rwlock_t *lockp) +{ + pthread_rwlock_t lock; + + assert(lockp); + lock = *lockp; + if (lock) { + if (lock->readers || !TAILQ_EMPTY(&lock->writers)) { +#define MSG "pthread_rwlock_destroy on rwlock with waiters!\n" + write(2, MSG, sizeof(MSG) - 1); +#undef MSG + return (EBUSY); + } + free(lock); + } + *lockp = NULL; + + return (0); +} + +static int +_rthread_rwlock_ensure_init(pthread_rwlock_t *lockp) +{ + int ret = 0; + + /* + * If the rwlock is statically initialized, perform the dynamic + * initialization. + */ + if (*lockp == NULL) + { + _spinlock(&rwlock_init_lock); + if (*lockp == NULL) + ret = pthread_rwlock_init(lockp, NULL); + _spinunlock(&rwlock_init_lock); + } + return (ret); +} + + +static int +_rthread_rwlock_rdlock(pthread_rwlock_t *lockp, const struct timespec *abstime, + int try) +{ + pthread_rwlock_t lock; + pthread_t thread = pthread_self(); + int error; + + if ((error = _rthread_rwlock_ensure_init(lockp))) + return (error); + + lock = *lockp; + _rthread_debug(5, "%p: rwlock_rdlock %p\n", (void *)thread, + (void *)lock); + _spinlock(&lock->lock); + + /* writers have precedence */ + if (lock->owner == NULL && TAILQ_EMPTY(&lock->writers)) + lock->readers++; + else if (try) + error = EBUSY; + else if (lock->owner == thread) + error = EDEADLK; + else { + do { + if (__thrsleep(lock, CLOCK_REALTIME, abstime, + &lock->lock, NULL) == EWOULDBLOCK) + return (ETIMEDOUT); + _spinlock(&lock->lock); + } while (lock->owner != NULL || !TAILQ_EMPTY(&lock->writers)); + lock->readers++; + } + _spinunlock(&lock->lock); + + return (error); +} + +int +pthread_rwlock_rdlock(pthread_rwlock_t *lockp) +{ + return (_rthread_rwlock_rdlock(lockp, NULL, 0)); +} + +int +pthread_rwlock_tryrdlock(pthread_rwlock_t *lockp) +{ + return (_rthread_rwlock_rdlock(lockp, NULL, 1)); +} + +int +pthread_rwlock_timedrdlock(pthread_rwlock_t *lockp, + const struct timespec *abstime) +{ + if (abstime == NULL || abstime->tv_nsec < 0 || + abstime->tv_nsec >= 1000000000) + return (EINVAL); + return (_rthread_rwlock_rdlock(lockp, abstime, 0)); +} + + +static int +_rthread_rwlock_wrlock(pthread_rwlock_t *lockp, const struct timespec *abstime, + int try) +{ + pthread_rwlock_t lock; + pthread_t thread = pthread_self(); + int error; + + if ((error = _rthread_rwlock_ensure_init(lockp))) + return (error); + + lock = *lockp; + + _rthread_debug(5, "%p: rwlock_timedwrlock %p\n", (void *)thread, + (void *)lock); + _spinlock(&lock->lock); + if (lock->readers == 0 && lock->owner == NULL) + lock->owner = thread; + else if (try) + error = EBUSY; + else if (lock->owner == thread) + error = EDEADLK; + else { + int do_wait; + + /* gotta block */ + TAILQ_INSERT_TAIL(&lock->writers, thread, waiting); + do { + do_wait = __thrsleep(thread, CLOCK_REALTIME, abstime, + &lock->lock, NULL) != EWOULDBLOCK; + _spinlock(&lock->lock); + } while (lock->owner != thread && do_wait); + + if (lock->owner != thread) { + /* timed out, sigh */ + TAILQ_REMOVE(&lock->writers, thread, waiting); + error = ETIMEDOUT; + } + } + _spinunlock(&lock->lock); + + return (error); +} + +int +pthread_rwlock_wrlock(pthread_rwlock_t *lockp) +{ + return (_rthread_rwlock_wrlock(lockp, NULL, 0)); +} + +int +pthread_rwlock_trywrlock(pthread_rwlock_t *lockp) +{ + return (_rthread_rwlock_wrlock(lockp, NULL, 1)); +} + +int +pthread_rwlock_timedwrlock(pthread_rwlock_t *lockp, + const struct timespec *abstime) +{ + if (abstime == NULL || abstime->tv_nsec < 0 || + abstime->tv_nsec >= 1000000000) + return (EINVAL); + return (_rthread_rwlock_wrlock(lockp, abstime, 0)); +} + + +int +pthread_rwlock_unlock(pthread_rwlock_t *lockp) +{ + pthread_rwlock_t lock; + pthread_t thread = pthread_self(); + pthread_t next; + int was_writer; + + lock = *lockp; + + _rthread_debug(5, "%p: rwlock_unlock %p\n", (void *)thread, + (void *)lock); + _spinlock(&lock->lock); + if (lock->owner != NULL) { + assert(lock->owner == thread); + was_writer = 1; + } else { + assert(lock->readers > 0); + lock->readers--; + if (lock->readers > 0) + goto out; + was_writer = 0; + } + + lock->owner = next = TAILQ_FIRST(&lock->writers); + if (next != NULL) { + /* dequeue and wake first writer */ + TAILQ_REMOVE(&lock->writers, next, waiting); + _spinunlock(&lock->lock); + __thrwakeup(next, 1); + return (0); + } + + /* could there have been blocked readers? wake them all */ + if (was_writer) + __thrwakeup(lock, 0); +out: + _spinunlock(&lock->lock); + + return (0); +}