On Fri, Sep 02, 2016 at 03:36:50PM +0200, Michal Mazurek wrote:
> Here is a working futex implementation for OpenBSD. This diff touches
> the kernel and librthread.
>
> * get rid of tickets from rthreads, they were getting in the way and are
> unused anyway
> * replace all struct _spinlock with int
> * use futexes instead of spinlocks everywhere within librthread
> * librthread no longer calls sched_yield(), nor does it spin
>
> Any comments?
Atomic sync builtins are missing on arm, hppa and superh.
This diff will break those architectures.
>
> Index: lib/librthread/Makefile
> ===================================================================
> RCS file: /cvs/src/lib/librthread/Makefile,v
> retrieving revision 1.43
> diff -u -p -r1.43 Makefile
> --- lib/librthread/Makefile 1 Jun 2016 04:34:18 -0000 1.43
> +++ lib/librthread/Makefile 2 Sep 2016 13:09:44 -0000
> @@ -18,7 +18,8 @@ CFLAGS+=-DNO_PIC
> VERSION_SCRIPT= ${.CURDIR}/Symbols.map
>
> .PATH: ${.CURDIR}/arch/${MACHINE_CPU}
> -SRCS= rthread.c \
> +SRCS= futex.c \
> + rthread.c \
> rthread_attr.c \
> rthread_barrier.c \
> rthread_barrier_attr.c \
> Index: lib/librthread/futex.c
> ===================================================================
> RCS file: lib/librthread/futex.c
> diff -N lib/librthread/futex.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ lib/librthread/futex.c 2 Sep 2016 13:09:44 -0000
> @@ -0,0 +1,41 @@
> +#include <sys/futex.h>
> +
> +#include <pthread.h>
> +#include "thread_private.h"
> +#include "rthread.h"
> +
> +inline int
> +futex_lock(volatile int *val)
> +{
> + int c;
> +
> + if ((c = __sync_val_compare_and_swap(val, 0, 1)) != 0) {
> + do {
> + if (c == 2 || __sync_val_compare_and_swap(val, 1, 2) !=
> 0) {
> + futex(val, FUTEX_WAIT, 2, NULL, NULL, 0);
> + }
> + } while ((c = __sync_val_compare_and_swap(val, 0, 2)) != 0);
> + }
> +
> + return 0;
> +}
> +
> +inline int
> +futex_trylock(volatile int *val)
> +{
> + if ((__sync_val_compare_and_swap(val, 0, 1)) != 0)
> + return 1;
> +
> + return 0;
> +}
> +
> +inline int
> +futex_unlock(volatile int *val)
> +{
> + if (__sync_sub_and_fetch(val, 1) != 0) {
> + *val = 0;
> + futex(val, FUTEX_WAKE, 1, NULL, NULL, 0);
> + }
> +
> + return 0;
> +}
> Index: lib/librthread/rthread.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread.c,v
> retrieving revision 1.92
> diff -u -p -r1.92 rthread.c
> --- lib/librthread/rthread.c 1 Sep 2016 10:41:02 -0000 1.92
> +++ lib/librthread/rthread.c 2 Sep 2016 13:09:44 -0000
> @@ -63,15 +63,15 @@ REDIRECT_SYSCALL(thrkill);
>
> static int concurrency_level; /* not used */
>
> -struct _spinlock _SPINLOCK_UNLOCKED_ASSIGN = _SPINLOCK_UNLOCKED;
> +int _SPINLOCK_UNLOCKED_ASSIGN = _SPINLOCK_UNLOCKED;
>
> int _threads_ready;
> size_t _thread_pagesize;
> struct listhead _thread_list = LIST_HEAD_INITIALIZER(_thread_list);
> -struct _spinlock _thread_lock = _SPINLOCK_UNLOCKED;
> +int _thread_lock = 0;
> static struct pthread_queue _thread_gc_list
> = TAILQ_HEAD_INITIALIZER(_thread_gc_list);
> -static struct _spinlock _thread_gc_lock = _SPINLOCK_UNLOCKED;
> +int _thread_gc_lock = 0;
> static struct pthread _initial_thread;
>
> struct pthread_attr _rthread_attr_default = {
> @@ -88,23 +88,22 @@ struct pthread_attr _rthread_attr_defaul
> /*
> * internal support functions
> */
> -void
> -_spinlock(volatile struct _spinlock *lock)
> +inline void
> +_spinlock(volatile int *lock)
> {
> - while (_atomic_lock(&lock->ticket))
> - sched_yield();
> + futex_lock(lock);
> }
>
> -int
> -_spinlocktry(volatile struct _spinlock *lock)
> +inline int
> +_spinlocktry(volatile int *lock)
> {
> - return 0 == _atomic_lock(&lock->ticket);
> + return 0 == futex_trylock(lock);
> }
>
> -void
> -_spinunlock(volatile struct _spinlock *lock)
> +inline void
> +_spinunlock(volatile int *lock)
> {
> - lock->ticket = _ATOMIC_LOCK_UNLOCKED;
> + futex_unlock(lock);
> }
>
> static void
> @@ -643,7 +642,7 @@ _thread_dump_info(void)
> void
> _rthread_dl_lock(int what)
> {
> - static struct _spinlock lock = _SPINLOCK_UNLOCKED;
> + static int lock = _SPINLOCK_UNLOCKED;
> static pthread_t owner = NULL;
> static struct pthread_queue lockers = TAILQ_HEAD_INITIALIZER(lockers);
> static int count = 0;
> @@ -658,8 +657,7 @@ _rthread_dl_lock(int what)
> } else if (owner != self) {
> TAILQ_INSERT_TAIL(&lockers, self, waiting);
> while (owner != self) {
> - __thrsleep(self, 0 | _USING_TICKETS, NULL,
> - &lock.ticket, NULL);
> + __thrsleep(self, 0, NULL, &lock, NULL);
> _spinlock(&lock);
> }
> }
> Index: lib/librthread/rthread.h
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread.h,v
> retrieving revision 1.58
> diff -u -p -r1.58 rthread.h
> --- lib/librthread/rthread.h 7 May 2016 19:05:22 -0000 1.58
> +++ lib/librthread/rthread.h 2 Sep 2016 13:09:44 -0000
> @@ -37,18 +37,8 @@
> #define RTHREAD_STACK_SIZE_DEF (256 * 1024)
> #endif
>
> -#define _USING_TICKETS 0
> -/*
> - * tickets don't work yet? (or seem much slower, with lots of system time)
> - * until then, keep the struct around to avoid excessive changes going
> - * back and forth.
> - */
> -struct _spinlock {
> - _atomic_lock_t ticket;
> -};
> -
> -#define _SPINLOCK_UNLOCKED { _ATOMIC_LOCK_UNLOCKED }
> -extern struct _spinlock _SPINLOCK_UNLOCKED_ASSIGN;
> +#define _SPINLOCK_UNLOCKED _ATOMIC_LOCK_UNLOCKED
> +extern int _SPINLOCK_UNLOCKED_ASSIGN;
>
> struct stack {
> SLIST_ENTRY(stack) link; /* link for free default stacks */
> @@ -60,7 +50,7 @@ struct stack {
> };
>
> struct __sem {
> - struct _spinlock lock;
> + int lock;
> volatile int waitcount;
> volatile int value;
> int shared;
> @@ -69,7 +59,7 @@ struct __sem {
> TAILQ_HEAD(pthread_queue, pthread);
>
> struct pthread_mutex {
> - struct _spinlock lock;
> + int lock;
> struct pthread_queue lockers;
> int type;
> pthread_t owner;
> @@ -84,7 +74,7 @@ struct pthread_mutex_attr {
> };
>
> struct pthread_cond {
> - struct _spinlock lock;
> + int lock;
> struct pthread_queue waiters;
> struct pthread_mutex *mutex;
> clockid_t clock;
> @@ -95,7 +85,7 @@ struct pthread_cond_attr {
> };
>
> struct pthread_rwlock {
> - struct _spinlock lock;
> + int lock;
> pthread_t owner;
> struct pthread_queue writers;
> int readers;
> @@ -150,7 +140,7 @@ struct pthread_barrierattr {
> };
>
> struct pthread_spinlock {
> - struct _spinlock lock;
> + int lock;
> pthread_t owner;
> };
>
> @@ -158,7 +148,7 @@ struct tib;
> struct pthread {
> struct __sem donesem;
> unsigned int flags;
> - struct _spinlock flags_lock;
> + int flags_lock;
> struct tib *tib;
> void *retval;
> void *(*fn)(void *);
> @@ -192,9 +182,9 @@ struct pthread {
> (((size) + (_thread_pagesize - 1)) & ~(_thread_pagesize - 1))
>
> __BEGIN_HIDDEN_DECLS
> -void _spinlock(volatile struct _spinlock *);
> -int _spinlocktry(volatile struct _spinlock *);
> -void _spinunlock(volatile struct _spinlock *);
> +void _spinlock(volatile int *);
> +int _spinlocktry(volatile int *);
> +void _spinunlock(volatile int *);
> int _sem_wait(sem_t, int, const struct timespec *, int *);
> int _sem_post(sem_t);
>
> @@ -213,7 +203,7 @@ void _thread_malloc_reinit(void);
> extern int _threads_ready;
> extern size_t _thread_pagesize;
> extern LIST_HEAD(listhead, pthread) _thread_list;
> -extern struct _spinlock _thread_lock;
> +extern int _thread_lock;
> extern struct pthread_attr _rthread_attr_default;
> __END_HIDDEN_DECLS
>
> @@ -226,3 +216,8 @@ int __thrsleep(const volatile void *, cl
> volatile void *, const int *);
> int __thrwakeup(const volatile void *, int n);
> int __thrsigdivert(sigset_t, siginfo_t *, const struct timespec *);
> +
> +/* futex.c */
> +int futex_lock(volatile int *);
> +int futex_trylock(volatile int *);
> +int futex_unlock(volatile int *);
> Index: lib/librthread/rthread_file.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_file.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 rthread_file.c
> --- lib/librthread/rthread_file.c 7 May 2016 19:05:22 -0000 1.8
> +++ lib/librthread/rthread_file.c 2 Sep 2016 13:09:44 -0000
> @@ -87,7 +87,7 @@ static struct static_file_lock {
> } flh[NUM_HEADS];
>
> /* Lock for accesses to the hash table: */
> -static struct _spinlock hash_lock = _SPINLOCK_UNLOCKED;
> +static int hash_lock = _SPINLOCK_UNLOCKED;
>
> /*
> * Find a lock structure for a FILE, return NULL if the file is
> @@ -205,8 +205,7 @@ _thread_flockfile(FILE * fp)
> */
> TAILQ_INSERT_TAIL(&p->lockers,self,waiting);
> while (p->owner != self) {
> - __thrsleep(self, 0 | _USING_TICKETS, NULL,
> - &hash_lock.ticket, NULL);
> + __thrsleep(self, 0, NULL, &hash_lock, NULL);
> _spinlock(&hash_lock);
> }
> }
> Index: lib/librthread/rthread_libc.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_libc.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 rthread_libc.c
> --- lib/librthread/rthread_libc.c 1 Sep 2016 10:56:46 -0000 1.16
> +++ lib/librthread/rthread_libc.c 2 Sep 2016 13:09:44 -0000
> @@ -201,7 +201,7 @@ _thread_malloc_reinit(void)
> /*
> * atexit lock
> */
> -static struct _spinlock atexit_lock = _SPINLOCK_UNLOCKED;
> +int atexit_lock = 0;
>
> void
> _thread_atexit_lock(void)
> @@ -218,7 +218,7 @@ _thread_atexit_unlock(void)
> /*
> * atfork lock
> */
> -static struct _spinlock atfork_lock = _SPINLOCK_UNLOCKED;
> +int atfork_lock = 0;
>
> void
> _thread_atfork_lock(void)
> @@ -235,7 +235,7 @@ _thread_atfork_unlock(void)
> /*
> * arc4random lock
> */
> -static struct _spinlock arc4_lock = _SPINLOCK_UNLOCKED;
> +int arc4_lock = 0;
>
> void
> _thread_arc4_lock(void)
> Index: lib/librthread/rthread_rwlock.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_rwlock.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 rthread_rwlock.c
> --- lib/librthread/rthread_rwlock.c 2 Apr 2016 19:56:53 -0000 1.6
> +++ lib/librthread/rthread_rwlock.c 2 Sep 2016 13:09:45 -0000
> @@ -31,7 +31,7 @@
> #include "rthread.h"
>
>
> -static struct _spinlock rwlock_init_lock = _SPINLOCK_UNLOCKED;
> +int rwlock_init_lock = 0;
>
> int
> pthread_rwlock_init(pthread_rwlock_t *lockp,
> @@ -117,8 +117,8 @@ _rthread_rwlock_rdlock(pthread_rwlock_t
> error = EDEADLK;
> else {
> do {
> - if (__thrsleep(lock, CLOCK_REALTIME | _USING_TICKETS,
> - abstime, &lock->lock.ticket, NULL) == EWOULDBLOCK)
> + if (__thrsleep(lock, CLOCK_REALTIME,
> + abstime, &lock->lock, NULL) == EWOULDBLOCK)
> return (ETIMEDOUT);
> _spinlock(&lock->lock);
> } while (lock->owner != NULL || !TAILQ_EMPTY(&lock->writers));
> @@ -180,9 +180,8 @@ _rthread_rwlock_wrlock(pthread_rwlock_t
> /* gotta block */
> TAILQ_INSERT_TAIL(&lock->writers, thread, waiting);
> do {
> - do_wait = __thrsleep(thread, CLOCK_REALTIME |
> - _USING_TICKETS, abstime,
> - &lock->lock.ticket, NULL) != EWOULDBLOCK;
> + do_wait = __thrsleep(thread, CLOCK_REALTIME, abstime,
> + &lock->lock, NULL) != EWOULDBLOCK;
> _spinlock(&lock->lock);
> } while (lock->owner != thread && do_wait);
>
> Index: lib/librthread/rthread_sem.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_sem.c,v
> retrieving revision 1.23
> diff -u -p -r1.23 rthread_sem.c
> --- lib/librthread/rthread_sem.c 7 May 2016 19:05:22 -0000 1.23
> +++ lib/librthread/rthread_sem.c 2 Sep 2016 13:09:45 -0000
> @@ -71,9 +71,8 @@ _sem_wait(sem_t sem, int tryonly, const
> } else {
> sem->waitcount++;
> do {
> - r = __thrsleep(ident, CLOCK_REALTIME |
> - _USING_TICKETS, abstime, &sem->lock.ticket,
> - delayed_cancel);
> + r = __thrsleep(ident, CLOCK_REALTIME, abstime,
> + &sem->lock, delayed_cancel);
> _spinlock(&sem->lock);
> /* ignore interruptions other than cancelation */
> if (r == EINTR && (delayed_cancel == NULL ||
> Index: lib/librthread/rthread_stack.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_stack.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 rthread_stack.c
> --- lib/librthread/rthread_stack.c 1 Sep 2016 10:56:46 -0000 1.15
> +++ lib/librthread/rthread_stack.c 2 Sep 2016 13:09:45 -0000
> @@ -18,7 +18,7 @@
> * attributes for possible reuse.
> */
> static SLIST_HEAD(, stack) def_stacks = SLIST_HEAD_INITIALIZER(head);
> -static struct _spinlock def_stacks_lock = _SPINLOCK_UNLOCKED;
> +int def_stacks_lock = 0;
>
> struct stack *
> _rthread_alloc_stack(pthread_t thread)
> Index: lib/librthread/rthread_sync.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_sync.c,v
> retrieving revision 1.42
> diff -u -p -r1.42 rthread_sync.c
> --- lib/librthread/rthread_sync.c 7 May 2016 19:05:22 -0000 1.42
> +++ lib/librthread/rthread_sync.c 2 Sep 2016 13:09:45 -0000
> @@ -32,7 +32,7 @@
> #include "rthread.h"
> #include "cancel.h" /* in libc/include */
>
> -static struct _spinlock static_init_lock = _SPINLOCK_UNLOCKED;
> +int static_init_lock = 0;
>
> /*
> * mutexen
> @@ -130,9 +130,8 @@ _rthread_mutex_lock(pthread_mutex_t *mut
> abort();
>
> /* self-deadlock, possibly until timeout */
> - while (__thrsleep(self, CLOCK_REALTIME |
> - _USING_TICKETS, abstime,
> - &mutex->lock.ticket, NULL) != EWOULDBLOCK)
> + while (__thrsleep(self, CLOCK_REALTIME, abstime,
> + &mutex->lock, NULL) != EWOULDBLOCK)
> _spinlock(&mutex->lock);
> return (ETIMEDOUT);
> }
> @@ -148,8 +147,8 @@ _rthread_mutex_lock(pthread_mutex_t *mut
> /* add to the wait queue and block until at the head */
> TAILQ_INSERT_TAIL(&mutex->lockers, self, waiting);
> while (mutex->owner != self) {
> - ret = __thrsleep(self, CLOCK_REALTIME | _USING_TICKETS,
> - abstime, &mutex->lock.ticket, NULL);
> + ret = __thrsleep(self, CLOCK_REALTIME, abstime,
> + &mutex->lock, NULL);
> _spinlock(&mutex->lock);
> assert(mutex->owner != NULL);
> if (ret == EWOULDBLOCK) {
> @@ -360,8 +359,8 @@ pthread_cond_timedwait(pthread_cond_t *c
>
> /* wait until we're the owner of the mutex again */
> while (mutex->owner != self) {
> - error = __thrsleep(self, cond->clock | _USING_TICKETS, abstime,
> - &mutex->lock.ticket, &self->delayed_cancel);
> + error = __thrsleep(self, cond->clock, abstime,
> + &mutex->lock, &self->delayed_cancel);
>
> /*
> * If abstime == NULL, then we're definitely waiting
> @@ -510,8 +509,8 @@ pthread_cond_wait(pthread_cond_t *condp,
>
> /* wait until we're the owner of the mutex again */
> while (mutex->owner != self) {
> - error = __thrsleep(self, 0 | _USING_TICKETS, NULL,
> - &mutex->lock.ticket, &self->delayed_cancel);
> + error = __thrsleep(self, 0, NULL, &mutex->lock,
> + &self->delayed_cancel);
>
> /*
> * If we took a normal signal (not from
> Index: lib/librthread/rthread_tls.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_tls.c,v
> retrieving revision 1.17
> diff -u -p -r1.17 rthread_tls.c
> --- lib/librthread/rthread_tls.c 2 Apr 2016 19:56:53 -0000 1.17
> +++ lib/librthread/rthread_tls.c 2 Sep 2016 13:09:45 -0000
> @@ -27,7 +27,7 @@
> #include "rthread.h"
>
> static struct rthread_key rkeys[PTHREAD_KEYS_MAX];
> -static struct _spinlock rkeyslock = _SPINLOCK_UNLOCKED;
> +int rkeyslock = 0;
>
> int
> pthread_key_create(pthread_key_t *key, void (*destructor)(void*))
> Index: sys/conf/files
> ===================================================================
> RCS file: /cvs/src/sys/conf/files,v
> retrieving revision 1.625
> diff -u -p -r1.625 files
> --- sys/conf/files 1 Sep 2016 10:06:33 -0000 1.625
> +++ sys/conf/files 2 Sep 2016 13:09:47 -0000
> @@ -660,6 +660,7 @@ file kern/kern_event.c
> file kern/kern_exec.c
> file kern/kern_exit.c
> file kern/kern_fork.c
> +file kern/kern_futex.c
> file kern/kern_kthread.c
> file kern/kern_ktrace.c ktrace
> file kern/kern_lock.c
> Index: sys/kern/init_sysent.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_sysent.c,v
> retrieving revision 1.184
> diff -u -p -r1.184 init_sysent.c
> --- sys/kern/init_sysent.c 27 Jun 2016 16:52:01 -0000 1.184
> +++ sys/kern/init_sysent.c 2 Sep 2016 13:09:48 -0000
> @@ -751,5 +751,7 @@ struct sysent sysent[] = {
> sys___set_tcb }, /* 329 = __set_tcb */
> { 0, 0, SY_NOLOCK | 0,
> sys___get_tcb }, /* 330 = __get_tcb */
> + { 6, s(struct sys_futex_args), 0,
> + sys_futex }, /* 331 = futex */
> };
>
> Index: sys/kern/kern_futex.c
> ===================================================================
> RCS file: sys/kern/kern_futex.c
> diff -N sys/kern/kern_futex.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/kern/kern_futex.c 2 Sep 2016 13:09:49 -0000
> @@ -0,0 +1,143 @@
> +/* $OpenBSD$ */
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +
> +#include <sys/signal.h>
> +#include <sys/mount.h>
> +#include <sys/socket.h>
> +#include <sys/syscallargs.h>
> +#include <sys/proc.h>
> +#include <sys/futex.h>
> +#include <sys/param.h>
> +#include <sys/ktrace.h>
> +#include <sys/kernel.h>
> +
> +int
> +futex_wait(struct proc *p, struct sys_futex_args *v)
> +{
> + struct sys_futex_args /* {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> + } */ *uap = v;
> + void *uaddr = SCARG(uap, uaddr);
> + int val = SCARG(uap, val);
> + int futex_op = SCARG(uap, futex_op);
> + uint64_t to_ticks = 0;
> + int uval;
> + int error;
> + int abort;
> +
> + if ((error = copyin(uaddr, &uval, sizeof uval)))
> + return error;
> + if (val != uval)
> + return EWOULDBLOCK;
> + if (SCARG(uap, tp) != NULL) {
> + struct timespec now;
> + struct timespec tsp;
> + int clock_id;
> +
> + if ((error = copyin(SCARG(uap, tp), &tsp, sizeof(tsp))))
> + return error;
> +
> + clock_id = futex_op & FUTEX_CLOCK_REALTIME ?
> + CLOCK_REALTIME : CLOCK_MONOTONIC;
> +
> + clock_id = CLOCK_MONOTONIC;
> +
> + if ((error = clock_gettime(p, clock_id, &now)))
> + return error;
> +
> +#ifdef KTRACE
> + if (KTRPOINT(p, KTR_STRUCT))
> + ktrabstimespec(p, &tsp);
> +#endif
> +
> + if (timespeccmp(&tsp, &now, <))
> + return EWOULDBLOCK;
> +
> + timespecsub(&tsp, &now, &tsp);
> + to_ticks = (uint64_t)hz * tsp.tv_sec +
> + (tsp.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
> + if (to_ticks > INT_MAX)
> + to_ticks = INT_MAX;
> + }
> +
> + /* XXX emulate 'abort' from thrsleep */
> + if (SCARG(uap, uaddr2) != NULL) {
> + if ((error = copyin(SCARG(uap, uaddr2), &abort,
> + sizeof(abort))) != 0)
> + return error;
> + if (abort)
> + return EINTR;
> + }
> +
> + p->p_futslpid = (long)uaddr;
> + error = tsleep(&p->p_futslpid, PUSER | PCATCH, "futex", (int)to_ticks);
> +
> + if (error == ERESTART)
> + error = EINTR;
> +
> + return error;
> +}
> +
> +int
> +futex_wake(struct proc *p, struct sys_futex_args *v)
> +{
> + struct sys_futex_args /* {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> + } */ *uap = v;
> + void *uaddr = SCARG(uap, uaddr);
> + int n = SCARG(uap, val);
> + int found = 0;
> + struct proc *q;
> +
> + TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
> + if (q->p_futslpid == (long)uaddr) {
> + wakeup_one(&q->p_futslpid);
> + q->p_futslpid = 0;
> + if (++found == n)
> + break;
> + }
> + }
> +
> + return found ? 0 : ESRCH;
> +}
> +
> +int
> +sys_futex(struct proc *p, void *v, register_t *retval)
> +{
> + struct sys_futex_args /* {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> + } */ *uap = v;
> + int op = SCARG(uap, futex_op);
> +
> + *retval = 0;
> +
> + switch (op) {
> + case FUTEX_WAIT:
> + *retval = futex_wait(p, uap);
> + break;
> + case FUTEX_WAKE:
> + *retval = futex_wake(p, uap);
> + break;
> + default:
> + *retval = EINVAL;
> + break;
> + }
> + return 0;
> +}
> Index: sys/kern/kern_pledge.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_pledge.c,v
> retrieving revision 1.181
> diff -u -p -r1.181 kern_pledge.c
> --- sys/kern/kern_pledge.c 31 Aug 2016 07:22:43 -0000 1.181
> +++ sys/kern/kern_pledge.c 2 Sep 2016 13:09:50 -0000
> @@ -260,6 +260,7 @@ const uint64_t pledge_syscalls[SYS_MAXSY
> [SYS___tfork] = PLEDGE_STDIO,
> [SYS_sched_yield] = PLEDGE_STDIO,
> [SYS___thrsleep] = PLEDGE_STDIO,
> + [SYS_futex] = PLEDGE_ALWAYS,
> [SYS___thrwakeup] = PLEDGE_STDIO,
> [SYS___threxit] = PLEDGE_STDIO,
> [SYS___thrsigdivert] = PLEDGE_STDIO,
> Index: sys/kern/kern_synch.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 kern_synch.c
> --- sys/kern/kern_synch.c 6 Jul 2016 15:53:01 -0000 1.133
> +++ sys/kern/kern_synch.c 2 Sep 2016 13:09:51 -0000
> @@ -59,7 +59,7 @@
> #endif
>
> int thrsleep(struct proc *, struct sys___thrsleep_args *);
> -int thrsleep_unlock(void *, int);
> +int thrsleep_unlock(struct proc *, void *, int);
>
> /*
> * We're only looking at 7 bits of the address; everything is
> @@ -454,25 +454,27 @@ sys_sched_yield(struct proc *p, void *v,
> }
>
> int
> -thrsleep_unlock(void *lock, int lockflags)
> +thrsleep_unlock(struct proc *p, void *lock, int lockflags)
> {
> static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
> _atomic_lock_t *atomiclock = lock;
> - uint32_t *ticket = lock;
> - uint32_t ticketvalue;
> int error;
>
> if (!lock)
> return (0);
>
> - if (lockflags) {
> - if ((error = copyin(ticket, &ticketvalue, sizeof(ticketvalue))))
> - return (error);
> - ticketvalue++;
> - error = copyout(&ticketvalue, ticket, sizeof(ticketvalue));
> - } else {
> - error = copyout(&unlocked, atomiclock, sizeof(unlocked));
> + error = copyout(&unlocked, atomiclock, sizeof(unlocked));
> +
> + struct proc *q;
> +
> + TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
> + if (q->p_futslpid == (long)lock) {
> + wakeup_one(&q->p_futslpid);
> + q->p_futslpid = 0;
> + break;
> + }
> }
> +
> return (error);
> }
>
> @@ -510,7 +512,7 @@ thrsleep(struct proc *p, struct sys___th
>
> if (timespeccmp(tsp, &now, <)) {
> /* already passed: still do the unlock */
> - if ((error = thrsleep_unlock(lock, lockflags)))
> + if ((error = thrsleep_unlock(p, lock, lockflags)))
> return (error);
> return (EWOULDBLOCK);
> }
> @@ -524,7 +526,7 @@ thrsleep(struct proc *p, struct sys___th
>
> p->p_thrslpid = ident;
>
> - if ((error = thrsleep_unlock(lock, lockflags))) {
> + if ((error = thrsleep_unlock(p, lock, lockflags))) {
> goto out;
> }
>
> Index: sys/kern/syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.c,v
> retrieving revision 1.183
> diff -u -p -r1.183 syscalls.c
> --- sys/kern/syscalls.c 27 Jun 2016 16:52:01 -0000 1.183
> +++ sys/kern/syscalls.c 2 Sep 2016 13:09:54 -0000
> @@ -393,4 +393,5 @@ char *syscallnames[] = {
> "#328 (obsolete __tfork51)", /* 328 = obsolete __tfork51 */
> "__set_tcb", /* 329 = __set_tcb */
> "__get_tcb", /* 330 = __get_tcb */
> + "futex", /* 331 = futex */
> };
> Index: sys/kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.173
> diff -u -p -r1.173 syscalls.master
> --- sys/kern/syscalls.master 27 Jun 2016 16:50:07 -0000 1.173
> +++ sys/kern/syscalls.master 2 Sep 2016 13:09:54 -0000
> @@ -563,3 +563,6 @@
> 328 OBSOL __tfork51
> 329 STD NOLOCK { void sys___set_tcb(void *tcb); }
> 330 STD NOLOCK { void *sys___get_tcb(void); }
> +331 STD { void sys_futex(void *uaddr, int futex_op, int val, \
> + const struct timespec *tp, int *uaddr2, \
> + int val3); }
> Index: sys/sys/futex.h
> ===================================================================
> RCS file: sys/sys/futex.h
> diff -N sys/sys/futex.h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/sys/futex.h 2 Sep 2016 13:09:59 -0000
> @@ -0,0 +1,17 @@
> +/* $OpenBSD$ */
> +
> +#ifndef _SYS_FUTEX_H_
> +#define _SYS_FUTEX_H_
> +
> +#include <sys/types.h>
> +
> +#define FUTEX_WAIT 0
> +#define FUTEX_WAKE 1
> +
> +#define FUTEX_CLOCK_REALTIME 256
> +
> +__BEGIN_DECLS
> +int futex(volatile void *, int, int, const struct timespec *, int *, int);
> +__END_DECLS
> +
> +#endif /* !_SYS_FUTEX_H_ */
> Index: sys/sys/proc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/proc.h,v
> retrieving revision 1.224
> diff -u -p -r1.224 proc.h
> --- sys/sys/proc.h 27 Jun 2016 19:55:02 -0000 1.224
> +++ sys/sys/proc.h 2 Sep 2016 13:10:00 -0000
> @@ -297,6 +297,7 @@ struct proc {
> int p_dupfd; /* Sideways return value from filedescopen.
> XXX */
>
> long p_thrslpid; /* for thrsleep syscall */
> + long p_futslpid; /* for futex syscall */
>
> /* scheduling */
> u_int p_estcpu; /* Time averaged value of p_cpticks. */
> Index: sys/sys/syscall.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscall.h,v
> retrieving revision 1.182
> diff -u -p -r1.182 syscall.h
> --- sys/sys/syscall.h 27 Jun 2016 16:52:01 -0000 1.182
> +++ sys/sys/syscall.h 2 Sep 2016 13:10:01 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: syscall.h,v 1.182 2016/06/27 16:52:01 jsing Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * System call numbers.
> @@ -702,4 +702,7 @@
> /* syscall: "__get_tcb" ret: "void *" args: */
> #define SYS___get_tcb 330
>
> -#define SYS_MAXSYSCALL 331
> +/* syscall: "futex" ret: "void" args: "void *" "int" "int" "const struct
> timespec *" "int *" "int" */
> +#define SYS_futex 331
> +
> +#define SYS_MAXSYSCALL 332
> Index: sys/sys/syscallargs.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscallargs.h,v
> retrieving revision 1.185
> diff -u -p -r1.185 syscallargs.h
> --- sys/sys/syscallargs.h 27 Jun 2016 16:52:01 -0000 1.185
> +++ sys/sys/syscallargs.h 2 Sep 2016 13:10:02 -0000
> @@ -1,4 +1,4 @@
> -/* $OpenBSD: syscallargs.h,v 1.185 2016/06/27 16:52:01 jsing Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * System call argument lists.
> @@ -1098,6 +1098,15 @@ struct sys___set_tcb_args {
> syscallarg(void *) tcb;
> };
>
> +struct sys_futex_args {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> +};
> +
> /*
> * System call prototypes.
> */
> @@ -1347,3 +1356,4 @@ int sys_symlinkat(struct proc *, void *,
> int sys_unlinkat(struct proc *, void *, register_t *);
> int sys___set_tcb(struct proc *, void *, register_t *);
> int sys___get_tcb(struct proc *, void *, register_t *);
> +int sys_futex(struct proc *, void *, register_t *);
>
> --
> Michal Mazurek
>