> Date: Sun, 4 Sep 2016 15:17:21 +0200
> From: Michal Mazurek <[email protected]>
>
> Resending a diff that works with the current tree.
>
> Comments?
I believe it was said before, but this can't go in since it breaks
architectures that don't implement the __sync_xxx primitives.
> Index: sys/conf/files
> ===================================================================
> RCS file: /cvs/src/sys/conf/files,v
> retrieving revision 1.627
> diff -u -p -r1.627 files
> --- sys/conf/files 4 Sep 2016 09:22:28 -0000 1.627
> +++ sys/conf/files 4 Sep 2016 13:12:39 -0000
> @@ -661,6 +661,7 @@ file kern/kern_event.c
> file kern/kern_exec.c
> file kern/kern_exit.c
> file kern/kern_fork.c
> +file kern/kern_futex.c
> file kern/kern_kthread.c
> file kern/kern_ktrace.c ktrace
> file kern/kern_lock.c
> Index: sys/kern/init_sysent.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_sysent.c,v
> retrieving revision 1.184
> diff -u -p -r1.184 init_sysent.c
> --- sys/kern/init_sysent.c 27 Jun 2016 16:52:01 -0000 1.184
> +++ sys/kern/init_sysent.c 4 Sep 2016 13:12:40 -0000
> @@ -751,5 +751,7 @@ struct sysent sysent[] = {
> sys___set_tcb }, /* 329 = __set_tcb */
> { 0, 0, SY_NOLOCK | 0,
> sys___get_tcb }, /* 330 = __get_tcb */
> + { 6, s(struct sys_futex_args), 0,
> + sys_futex }, /* 331 = futex */
> };
>
> Index: sys/kern/kern_futex.c
> ===================================================================
> RCS file: sys/kern/kern_futex.c
> diff -N sys/kern/kern_futex.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/kern/kern_futex.c 4 Sep 2016 13:12:40 -0000
> @@ -0,0 +1,148 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2016
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/param.h>
> +#include <sys/systm.h>
> +
> +#include <sys/signal.h>
> +#include <sys/mount.h>
> +#include <sys/socket.h>
> +#include <sys/syscallargs.h>
> +#include <sys/proc.h>
> +#include <sys/futex.h>
> +#include <sys/param.h>
> +#include <sys/ktrace.h>
> +#include <sys/kernel.h>
> +
> +int
> +futex_wait(struct proc *p, struct sys_futex_args *v)
> +{
> + struct sys_futex_args /* {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> + } */ *uap = v;
> + void *uaddr = SCARG(uap, uaddr);
> + int val = SCARG(uap, val);
> + int futex_op = SCARG(uap, futex_op);
> + uint64_t to_ticks = 0;
> + int uval;
> + int error;
> +
> + if ((error = copyin(uaddr, &uval, sizeof uval)))
> + return error;
> + if (val != uval)
> + return EWOULDBLOCK;
> + if (SCARG(uap, tp) != NULL) {
> + struct timespec now;
> + struct timespec tsp;
> + int clock_id;
> +
> + if ((error = copyin(SCARG(uap, tp), &tsp, sizeof(tsp))))
> + return error;
> +
> + clock_id = futex_op & FUTEX_CLOCK_REALTIME ?
> + CLOCK_REALTIME : CLOCK_MONOTONIC;
> +
> + clock_id = CLOCK_MONOTONIC;
> +
> + if ((error = clock_gettime(p, clock_id, &now)))
> + return error;
> +
> +#ifdef KTRACE
> + if (KTRPOINT(p, KTR_STRUCT))
> + ktrabstimespec(p, &tsp);
> +#endif
> +
> + if (timespeccmp(&tsp, &now, <))
> + return EWOULDBLOCK;
> +
> + timespecsub(&tsp, &now, &tsp);
> + to_ticks = (uint64_t)hz * tsp.tv_sec +
> + (tsp.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1;
> + if (to_ticks > INT_MAX)
> + to_ticks = INT_MAX;
> + }
> +
> + p->p_futslpid = (long)uaddr;
> + error = tsleep(&p->p_futslpid, PUSER | PCATCH, "futex", (int)to_ticks);
> +
> + if (error == ERESTART)
> + error = EINTR;
> +
> + return error;
> +}
> +
> +int
> +futex_wake(struct proc *p, struct sys_futex_args *v)
> +{
> + struct sys_futex_args /* {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> + } */ *uap = v;
> + void *uaddr = SCARG(uap, uaddr);
> + int n = SCARG(uap, val);
> + int found = 0;
> + struct proc *q;
> +
> + TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
> + if (q->p_futslpid == (long)uaddr) {
> + wakeup_one(&q->p_futslpid);
> + q->p_futslpid = 0;
> + if (++found == n)
> + break;
> + }
> + }
> +
> + return found ? 0 : ESRCH;
> +}
> +
> +int
> +sys_futex(struct proc *p, void *v, register_t *retval)
> +{
> + struct sys_futex_args /* {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> + } */ *uap = v;
> + int op = SCARG(uap, futex_op);
> +
> + *retval = 0;
> +
> + switch (op) {
> + case FUTEX_WAIT:
> + *retval = futex_wait(p, uap);
> + break;
> + case FUTEX_WAKE:
> + *retval = futex_wake(p, uap);
> + break;
> + default:
> + *retval = EINVAL;
> + break;
> + }
> + return 0;
> +}
> Index: sys/kern/kern_pledge.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_pledge.c,v
> retrieving revision 1.181
> diff -u -p -r1.181 kern_pledge.c
> --- sys/kern/kern_pledge.c 31 Aug 2016 07:22:43 -0000 1.181
> +++ sys/kern/kern_pledge.c 4 Sep 2016 13:12:40 -0000
> @@ -260,6 +260,7 @@ const uint64_t pledge_syscalls[SYS_MAXSY
> [SYS___tfork] = PLEDGE_STDIO,
> [SYS_sched_yield] = PLEDGE_STDIO,
> [SYS___thrsleep] = PLEDGE_STDIO,
> + [SYS_futex] = PLEDGE_ALWAYS,
> [SYS___thrwakeup] = PLEDGE_STDIO,
> [SYS___threxit] = PLEDGE_STDIO,
> [SYS___thrsigdivert] = PLEDGE_STDIO,
> Index: sys/kern/kern_synch.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.134
> diff -u -p -r1.134 kern_synch.c
> --- sys/kern/kern_synch.c 3 Sep 2016 15:06:06 -0000 1.134
> +++ sys/kern/kern_synch.c 4 Sep 2016 13:12:40 -0000
> @@ -59,7 +59,7 @@
> #endif
>
> int thrsleep(struct proc *, struct sys___thrsleep_args *);
> -int thrsleep_unlock(void *);
> +int thrsleep_unlock(struct proc *, void *);
>
> /*
> * We're only looking at 7 bits of the address; everything is
> @@ -454,15 +454,27 @@ sys_sched_yield(struct proc *p, void *v,
> }
>
> int
> -thrsleep_unlock(void *lock)
> +thrsleep_unlock(struct proc *p, void *lock)
> {
> static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED;
> _atomic_lock_t *atomiclock = lock;
> + struct proc *q;
> + int error;
>
> if (!lock)
> return 0;
>
> - return copyout(&unlocked, atomiclock, sizeof(unlocked));
> + error = copyout(&unlocked, atomiclock, sizeof(unlocked));
> +
> + TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
> + if (q->p_futslpid == (long)lock) {
> + wakeup_one(&q->p_futslpid);
> + q->p_futslpid = 0;
> + break;
> + }
> + }
> +
> + return error;
> }
>
> static int globalsleepaddr;
> @@ -498,7 +510,7 @@ thrsleep(struct proc *p, struct sys___th
>
> if (timespeccmp(tsp, &now, <)) {
> /* already passed: still do the unlock */
> - if ((error = thrsleep_unlock(lock)))
> + if ((error = thrsleep_unlock(p, lock)))
> return (error);
> return (EWOULDBLOCK);
> }
> @@ -512,7 +524,7 @@ thrsleep(struct proc *p, struct sys___th
>
> p->p_thrslpid = ident;
>
> - if ((error = thrsleep_unlock(lock)))
> + if ((error = thrsleep_unlock(p, lock)))
> goto out;
>
> if (SCARG(uap, abort) != NULL) {
> Index: sys/kern/syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.c,v
> retrieving revision 1.183
> diff -u -p -r1.183 syscalls.c
> --- sys/kern/syscalls.c 27 Jun 2016 16:52:01 -0000 1.183
> +++ sys/kern/syscalls.c 4 Sep 2016 13:12:40 -0000
> @@ -393,4 +393,5 @@ char *syscallnames[] = {
> "#328 (obsolete __tfork51)", /* 328 = obsolete __tfork51 */
> "__set_tcb", /* 329 = __set_tcb */
> "__get_tcb", /* 330 = __get_tcb */
> + "futex", /* 331 = futex */
> };
> Index: sys/kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.173
> diff -u -p -r1.173 syscalls.master
> --- sys/kern/syscalls.master 27 Jun 2016 16:50:07 -0000 1.173
> +++ sys/kern/syscalls.master 4 Sep 2016 13:12:40 -0000
> @@ -563,3 +563,6 @@
> 328 OBSOL __tfork51
> 329 STD NOLOCK { void sys___set_tcb(void *tcb); }
> 330 STD NOLOCK { void *sys___get_tcb(void); }
> +331 STD { void sys_futex(void *uaddr, int futex_op, int val, \
> + const struct timespec *tp, int *uaddr2, \
> + int val3); }
> Index: sys/sys/futex.h
> ===================================================================
> RCS file: sys/sys/futex.h
> diff -N sys/sys/futex.h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ sys/sys/futex.h 4 Sep 2016 13:12:40 -0000
> @@ -0,0 +1,32 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2016
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#ifndef _SYS_FUTEX_H_
> +#define _SYS_FUTEX_H_
> +
> +#include <sys/types.h>
> +
> +#define FUTEX_WAIT 0
> +#define FUTEX_WAKE 1
> +
> +#define FUTEX_CLOCK_REALTIME 256
> +
> +__BEGIN_DECLS
> +int futex(volatile void *, int, int, const struct timespec *, int *, int);
> +__END_DECLS
> +
> +#endif /* !_SYS_FUTEX_H_ */
> Index: sys/sys/proc.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/proc.h,v
> retrieving revision 1.226
> diff -u -p -r1.226 proc.h
> --- sys/sys/proc.h 3 Sep 2016 08:47:24 -0000 1.226
> +++ sys/sys/proc.h 4 Sep 2016 13:12:40 -0000
> @@ -302,6 +302,7 @@ struct proc {
> int p_dupfd; /* Sideways return value from filedescopen.
> XXX */
>
> long p_thrslpid; /* for thrsleep syscall */
> + long p_futslpid; /* for futex syscall */
>
> /* scheduling */
> u_int p_estcpu; /* Time averaged value of p_cpticks. */
> Index: sys/sys/syscall.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscall.h,v
> retrieving revision 1.182
> diff -u -p -r1.182 syscall.h
> --- sys/sys/syscall.h 27 Jun 2016 16:52:01 -0000 1.182
> +++ sys/sys/syscall.h 4 Sep 2016 13:12:40 -0000
> @@ -702,4 +702,7 @@
> /* syscall: "__get_tcb" ret: "void *" args: */
> #define SYS___get_tcb 330
>
> -#define SYS_MAXSYSCALL 331
> +/* syscall: "futex" ret: "void" args: "void *" "int" "int" "const struct
> timespec *" "int *" "int" */
> +#define SYS_futex 331
> +
> +#define SYS_MAXSYSCALL 332
> Index: sys/sys/syscallargs.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/syscallargs.h,v
> retrieving revision 1.185
> diff -u -p -r1.185 syscallargs.h
> --- sys/sys/syscallargs.h 27 Jun 2016 16:52:01 -0000 1.185
> +++ sys/sys/syscallargs.h 4 Sep 2016 13:12:40 -0000
> @@ -1098,6 +1098,15 @@ struct sys___set_tcb_args {
> syscallarg(void *) tcb;
> };
>
> +struct sys_futex_args {
> + syscallarg(void *) uaddr;
> + syscallarg(int) futex_op;
> + syscallarg(int) val;
> + syscallarg(const struct timespec *) tp;
> + syscallarg(int *) uaddr2;
> + syscallarg(int) val3;
> +};
> +
> /*
> * System call prototypes.
> */
> @@ -1347,3 +1356,4 @@ int sys_symlinkat(struct proc *, void *,
> int sys_unlinkat(struct proc *, void *, register_t *);
> int sys___set_tcb(struct proc *, void *, register_t *);
> int sys___get_tcb(struct proc *, void *, register_t *);
> +int sys_futex(struct proc *, void *, register_t *);
> Index: lib/librthread/Makefile
> ===================================================================
> RCS file: /cvs/src/lib/librthread/Makefile,v
> retrieving revision 1.43
> diff -u -p -r1.43 Makefile
> --- lib/librthread/Makefile 1 Jun 2016 04:34:18 -0000 1.43
> +++ lib/librthread/Makefile 4 Sep 2016 13:12:40 -0000
> @@ -18,7 +18,8 @@ CFLAGS+=-DNO_PIC
> VERSION_SCRIPT= ${.CURDIR}/Symbols.map
>
> .PATH: ${.CURDIR}/arch/${MACHINE_CPU}
> -SRCS= rthread.c \
> +SRCS= futex.c \
> + rthread.c \
> rthread_attr.c \
> rthread_barrier.c \
> rthread_barrier_attr.c \
> Index: lib/librthread/futex.c
> ===================================================================
> RCS file: lib/librthread/futex.c
> diff -N lib/librthread/futex.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ lib/librthread/futex.c 4 Sep 2016 13:12:40 -0000
> @@ -0,0 +1,58 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2016
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/futex.h>
> +
> +#include <pthread.h>
> +#include "thread_private.h"
> +#include "rthread.h"
> +
> +inline int
> +futex_lock(volatile _atomic_lock_t *val)
> +{
> + int c;
> +
> + if ((c = __sync_val_compare_and_swap(val, 0, 1)) != 0) {
> + do {
> + if (c == 2 || __sync_val_compare_and_swap(val, 1, 2) !=
> 0) {
> + futex(val, FUTEX_WAIT, 2, NULL, NULL, 0);
> + }
> + } while ((c = __sync_val_compare_and_swap(val, 0, 2)) != 0);
> + }
> +
> + return 0;
> +}
> +
> +inline int
> +futex_trylock(volatile _atomic_lock_t *val)
> +{
> + if ((__sync_val_compare_and_swap(val, 0, 1)) != 0)
> + return 1;
> +
> + return 0;
> +}
> +
> +inline int
> +futex_unlock(volatile _atomic_lock_t *val)
> +{
> + if (__sync_sub_and_fetch(val, 1) != 0) {
> + *val = 0;
> + futex(val, FUTEX_WAKE, 1, NULL, NULL, 0);
> + }
> +
> + return 0;
> +}
> Index: lib/librthread/rthread.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread.c,v
> retrieving revision 1.94
> diff -u -p -r1.94 rthread.c
> --- lib/librthread/rthread.c 4 Sep 2016 10:13:35 -0000 1.94
> +++ lib/librthread/rthread.c 4 Sep 2016 13:12:40 -0000
> @@ -89,20 +89,19 @@ struct pthread_attr _rthread_attr_defaul
> void
> _spinlock(volatile _atomic_lock_t *lock)
> {
> - while (_atomic_lock(lock))
> - sched_yield();
> + futex_lock(lock);
> }
>
> int
> _spinlocktry(volatile _atomic_lock_t *lock)
> {
> - return 0 == _atomic_lock(lock);
> + return 0 == futex_trylock(lock);
> }
>
> void
> _spinunlock(volatile _atomic_lock_t *lock)
> {
> - *lock = _ATOMIC_LOCK_UNLOCKED;
> + futex_unlock(lock);
> }
>
> static void
> Index: lib/librthread/rthread.h
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread.h,v
> retrieving revision 1.60
> diff -u -p -r1.60 rthread.h
> --- lib/librthread/rthread.h 4 Sep 2016 10:13:35 -0000 1.60
> +++ lib/librthread/rthread.h 4 Sep 2016 13:12:40 -0000
> @@ -215,3 +215,8 @@ int __thrsleep(const volatile void *, cl
> volatile void *, const int *);
> int __thrwakeup(const volatile void *, int n);
> int __thrsigdivert(sigset_t, siginfo_t *, const struct timespec *);
> +
> +/* futex.c */
> +int futex_lock(volatile _atomic_lock_t *);
> +int futex_trylock(volatile _atomic_lock_t *);
> +int futex_unlock(volatile _atomic_lock_t *);
> Index: lib/libc/Symbols.list
> ===================================================================
> RCS file: /cvs/src/lib/libc/Symbols.list,v
> retrieving revision 1.50
> diff -u -p -r1.50 Symbols.list
> --- lib/libc/Symbols.list 3 Sep 2016 16:25:03 -0000 1.50
> +++ lib/libc/Symbols.list 4 Sep 2016 13:12:40 -0000
> @@ -272,6 +272,7 @@ fstatat
> fstatfs
> fsync
> ftruncate
> +futex
> futimens
> futimes
> getdents
> Index: lib/libc/sys/Makefile.inc
> ===================================================================
> RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
> retrieving revision 1.146
> diff -u -p -r1.146 Makefile.inc
> --- lib/libc/sys/Makefile.inc 4 Jul 2016 18:01:44 -0000 1.146
> +++ lib/libc/sys/Makefile.inc 4 Sep 2016 13:12:41 -0000
> @@ -87,6 +87,7 @@ DASM= ${ASM:.o=.do}
> # syscalls that CANNOT FAIL. They can return whatever value they want,
> # they just never want to set errno.
> ASM_NOERR=__get_tcb.o __set_tcb.o __threxit.o __thrsleep.o __thrwakeup.o \
> + futex.o \
> getdtablecount.o getegid.o geteuid.o getgid.o getlogin_r.o \
> getpgrp.o getpid.o getppid.o getrtable.o getthrid.o getuid.o \
> issetugid.o \
>
>
> --
> Michal Mazurek
>
>