Resending a diff that works with the current tree. Comments?
Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.627 diff -u -p -r1.627 files --- sys/conf/files 4 Sep 2016 09:22:28 -0000 1.627 +++ sys/conf/files 4 Sep 2016 13:12:39 -0000 @@ -661,6 +661,7 @@ file kern/kern_event.c file kern/kern_exec.c file kern/kern_exit.c file kern/kern_fork.c +file kern/kern_futex.c file kern/kern_kthread.c file kern/kern_ktrace.c ktrace file kern/kern_lock.c Index: sys/kern/init_sysent.c =================================================================== RCS file: /cvs/src/sys/kern/init_sysent.c,v retrieving revision 1.184 diff -u -p -r1.184 init_sysent.c --- sys/kern/init_sysent.c 27 Jun 2016 16:52:01 -0000 1.184 +++ sys/kern/init_sysent.c 4 Sep 2016 13:12:40 -0000 @@ -751,5 +751,7 @@ struct sysent sysent[] = { sys___set_tcb }, /* 329 = __set_tcb */ { 0, 0, SY_NOLOCK | 0, sys___get_tcb }, /* 330 = __get_tcb */ + { 6, s(struct sys_futex_args), 0, + sys_futex }, /* 331 = futex */ }; Index: sys/kern/kern_futex.c =================================================================== RCS file: sys/kern/kern_futex.c diff -N sys/kern/kern_futex.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kern/kern_futex.c 4 Sep 2016 13:12:40 -0000 @@ -0,0 +1,148 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2016 + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/param.h> +#include <sys/systm.h> + +#include <sys/signal.h> +#include <sys/mount.h> +#include <sys/socket.h> +#include <sys/syscallargs.h> +#include <sys/proc.h> +#include <sys/futex.h> +#include <sys/param.h> +#include <sys/ktrace.h> +#include <sys/kernel.h> + +int +futex_wait(struct proc *p, struct sys_futex_args *v) +{ + struct sys_futex_args /* { + syscallarg(void *) uaddr; + syscallarg(int) futex_op; + syscallarg(int) val; + syscallarg(const struct timespec *) tp; + syscallarg(int *) uaddr2; + syscallarg(int) val3; + } */ *uap = v; + void *uaddr = SCARG(uap, uaddr); + int val = SCARG(uap, val); + int futex_op = SCARG(uap, futex_op); + uint64_t to_ticks = 0; + int uval; + int error; + + if ((error = copyin(uaddr, &uval, sizeof uval))) + return error; + if (val != uval) + return EWOULDBLOCK; + if (SCARG(uap, tp) != NULL) { + struct timespec now; + struct timespec tsp; + int clock_id; + + if ((error = copyin(SCARG(uap, tp), &tsp, sizeof(tsp)))) + return error; + + clock_id = futex_op & FUTEX_CLOCK_REALTIME ? + CLOCK_REALTIME : CLOCK_MONOTONIC; + + clock_id = CLOCK_MONOTONIC; + + if ((error = clock_gettime(p, clock_id, &now))) + return error; + +#ifdef KTRACE + if (KTRPOINT(p, KTR_STRUCT)) + ktrabstimespec(p, &tsp); +#endif + + if (timespeccmp(&tsp, &now, <)) + return EWOULDBLOCK; + + timespecsub(&tsp, &now, &tsp); + to_ticks = (uint64_t)hz * tsp.tv_sec + + (tsp.tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1; + if (to_ticks > INT_MAX) + to_ticks = INT_MAX; + } + + p->p_futslpid = (long)uaddr; + error = tsleep(&p->p_futslpid, PUSER | PCATCH, "futex", (int)to_ticks); + + if (error == ERESTART) + error = EINTR; + + return error; +} + +int +futex_wake(struct proc *p, struct sys_futex_args *v) +{ + struct sys_futex_args /* { + syscallarg(void *) uaddr; + syscallarg(int) futex_op; + syscallarg(int) val; + syscallarg(const struct timespec *) tp; + syscallarg(int *) uaddr2; + syscallarg(int) val3; + } */ *uap = v; + void *uaddr = SCARG(uap, uaddr); + int n = SCARG(uap, val); + int found = 0; + struct proc *q; + + TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) { + if (q->p_futslpid == (long)uaddr) { + wakeup_one(&q->p_futslpid); + q->p_futslpid = 0; + if (++found == n) + break; + } + } + + return found ? 0 : ESRCH; +} + +int +sys_futex(struct proc *p, void *v, register_t *retval) +{ + struct sys_futex_args /* { + syscallarg(void *) uaddr; + syscallarg(int) futex_op; + syscallarg(int) val; + syscallarg(const struct timespec *) tp; + syscallarg(int *) uaddr2; + syscallarg(int) val3; + } */ *uap = v; + int op = SCARG(uap, futex_op); + + *retval = 0; + + switch (op) { + case FUTEX_WAIT: + *retval = futex_wait(p, uap); + break; + case FUTEX_WAKE: + *retval = futex_wake(p, uap); + break; + default: + *retval = EINVAL; + break; + } + return 0; +} Index: sys/kern/kern_pledge.c =================================================================== RCS file: /cvs/src/sys/kern/kern_pledge.c,v retrieving revision 1.181 diff -u -p -r1.181 kern_pledge.c --- sys/kern/kern_pledge.c 31 Aug 2016 07:22:43 -0000 1.181 +++ sys/kern/kern_pledge.c 4 Sep 2016 13:12:40 -0000 @@ -260,6 +260,7 @@ const uint64_t pledge_syscalls[SYS_MAXSY [SYS___tfork] = PLEDGE_STDIO, [SYS_sched_yield] = PLEDGE_STDIO, [SYS___thrsleep] = PLEDGE_STDIO, + [SYS_futex] = PLEDGE_ALWAYS, [SYS___thrwakeup] = PLEDGE_STDIO, [SYS___threxit] = PLEDGE_STDIO, [SYS___thrsigdivert] = PLEDGE_STDIO, Index: sys/kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.134 diff -u -p -r1.134 kern_synch.c --- sys/kern/kern_synch.c 3 Sep 2016 15:06:06 -0000 1.134 +++ sys/kern/kern_synch.c 4 Sep 2016 13:12:40 -0000 @@ -59,7 +59,7 @@ #endif int thrsleep(struct proc *, struct sys___thrsleep_args *); -int thrsleep_unlock(void *); +int thrsleep_unlock(struct proc *, void *); /* * We're only looking at 7 bits of the address; everything is @@ -454,15 +454,27 @@ sys_sched_yield(struct proc *p, void *v, } int -thrsleep_unlock(void *lock) +thrsleep_unlock(struct proc *p, void *lock) { static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED; _atomic_lock_t *atomiclock = lock; + struct proc *q; + int error; if (!lock) return 0; - return copyout(&unlocked, atomiclock, sizeof(unlocked)); + error = copyout(&unlocked, atomiclock, sizeof(unlocked)); + + TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) { + if (q->p_futslpid == (long)lock) { + wakeup_one(&q->p_futslpid); + q->p_futslpid = 0; + break; + } + } + + return error; } static int globalsleepaddr; @@ -498,7 +510,7 @@ thrsleep(struct proc *p, struct sys___th if (timespeccmp(tsp, &now, <)) { /* already passed: still do the unlock */ - if ((error = thrsleep_unlock(lock))) + if ((error = thrsleep_unlock(p, lock))) return (error); return (EWOULDBLOCK); } @@ -512,7 +524,7 @@ thrsleep(struct proc *p, struct sys___th p->p_thrslpid = ident; - if ((error = thrsleep_unlock(lock))) + if ((error = thrsleep_unlock(p, lock))) goto out; if (SCARG(uap, abort) != NULL) { Index: sys/kern/syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/syscalls.c,v retrieving revision 1.183 diff -u -p -r1.183 syscalls.c --- sys/kern/syscalls.c 27 Jun 2016 16:52:01 -0000 1.183 +++ sys/kern/syscalls.c 4 Sep 2016 13:12:40 -0000 @@ -393,4 +393,5 @@ char *syscallnames[] = { "#328 (obsolete __tfork51)", /* 328 = obsolete __tfork51 */ "__set_tcb", /* 329 = __set_tcb */ "__get_tcb", /* 330 = __get_tcb */ + "futex", /* 331 = futex */ }; Index: sys/kern/syscalls.master =================================================================== RCS file: /cvs/src/sys/kern/syscalls.master,v retrieving revision 1.173 diff -u -p -r1.173 syscalls.master --- sys/kern/syscalls.master 27 Jun 2016 16:50:07 -0000 1.173 +++ sys/kern/syscalls.master 4 Sep 2016 13:12:40 -0000 @@ -563,3 +563,6 @@ 328 OBSOL __tfork51 329 STD NOLOCK { void sys___set_tcb(void *tcb); } 330 STD NOLOCK { void *sys___get_tcb(void); } +331 STD { void sys_futex(void *uaddr, int futex_op, int val, \ + const struct timespec *tp, int *uaddr2, \ + int val3); } Index: sys/sys/futex.h =================================================================== RCS file: sys/sys/futex.h diff -N sys/sys/futex.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/futex.h 4 Sep 2016 13:12:40 -0000 @@ -0,0 +1,32 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2016 + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_FUTEX_H_ +#define _SYS_FUTEX_H_ + +#include <sys/types.h> + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 + +#define FUTEX_CLOCK_REALTIME 256 + +__BEGIN_DECLS +int futex(volatile void *, int, int, const struct timespec *, int *, int); +__END_DECLS + +#endif /* !_SYS_FUTEX_H_ */ Index: sys/sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.226 diff -u -p -r1.226 proc.h --- sys/sys/proc.h 3 Sep 2016 08:47:24 -0000 1.226 +++ sys/sys/proc.h 4 Sep 2016 13:12:40 -0000 @@ -302,6 +302,7 @@ struct proc { int p_dupfd; /* Sideways return value from filedescopen. XXX */ long p_thrslpid; /* for thrsleep syscall */ + long p_futslpid; /* for futex syscall */ /* scheduling */ u_int p_estcpu; /* Time averaged value of p_cpticks. */ Index: sys/sys/syscall.h =================================================================== RCS file: /cvs/src/sys/sys/syscall.h,v retrieving revision 1.182 diff -u -p -r1.182 syscall.h --- sys/sys/syscall.h 27 Jun 2016 16:52:01 -0000 1.182 +++ sys/sys/syscall.h 4 Sep 2016 13:12:40 -0000 @@ -702,4 +702,7 @@ /* syscall: "__get_tcb" ret: "void *" args: */ #define SYS___get_tcb 330 -#define SYS_MAXSYSCALL 331 +/* syscall: "futex" ret: "void" args: "void *" "int" "int" "const struct timespec *" "int *" "int" */ +#define SYS_futex 331 + +#define SYS_MAXSYSCALL 332 Index: sys/sys/syscallargs.h =================================================================== RCS file: /cvs/src/sys/sys/syscallargs.h,v retrieving revision 1.185 diff -u -p -r1.185 syscallargs.h --- sys/sys/syscallargs.h 27 Jun 2016 16:52:01 -0000 1.185 +++ sys/sys/syscallargs.h 4 Sep 2016 13:12:40 -0000 @@ -1098,6 +1098,15 @@ struct sys___set_tcb_args { syscallarg(void *) tcb; }; +struct sys_futex_args { + syscallarg(void *) uaddr; + syscallarg(int) futex_op; + syscallarg(int) val; + syscallarg(const struct timespec *) tp; + syscallarg(int *) uaddr2; + syscallarg(int) val3; +}; + /* * System call prototypes. */ @@ -1347,3 +1356,4 @@ int sys_symlinkat(struct proc *, void *, int sys_unlinkat(struct proc *, void *, register_t *); int sys___set_tcb(struct proc *, void *, register_t *); int sys___get_tcb(struct proc *, void *, register_t *); +int sys_futex(struct proc *, void *, register_t *); Index: lib/librthread/Makefile =================================================================== RCS file: /cvs/src/lib/librthread/Makefile,v retrieving revision 1.43 diff -u -p -r1.43 Makefile --- lib/librthread/Makefile 1 Jun 2016 04:34:18 -0000 1.43 +++ lib/librthread/Makefile 4 Sep 2016 13:12:40 -0000 @@ -18,7 +18,8 @@ CFLAGS+=-DNO_PIC VERSION_SCRIPT= ${.CURDIR}/Symbols.map .PATH: ${.CURDIR}/arch/${MACHINE_CPU} -SRCS= rthread.c \ +SRCS= futex.c \ + rthread.c \ rthread_attr.c \ rthread_barrier.c \ rthread_barrier_attr.c \ Index: lib/librthread/futex.c =================================================================== RCS file: lib/librthread/futex.c diff -N lib/librthread/futex.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ lib/librthread/futex.c 4 Sep 2016 13:12:40 -0000 @@ -0,0 +1,58 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2016 + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/futex.h> + +#include <pthread.h> +#include "thread_private.h" +#include "rthread.h" + +inline int +futex_lock(volatile _atomic_lock_t *val) +{ + int c; + + if ((c = __sync_val_compare_and_swap(val, 0, 1)) != 0) { + do { + if (c == 2 || __sync_val_compare_and_swap(val, 1, 2) != 0) { + futex(val, FUTEX_WAIT, 2, NULL, NULL, 0); + } + } while ((c = __sync_val_compare_and_swap(val, 0, 2)) != 0); + } + + return 0; +} + +inline int +futex_trylock(volatile _atomic_lock_t *val) +{ + if ((__sync_val_compare_and_swap(val, 0, 1)) != 0) + return 1; + + return 0; +} + +inline int +futex_unlock(volatile _atomic_lock_t *val) +{ + if (__sync_sub_and_fetch(val, 1) != 0) { + *val = 0; + futex(val, FUTEX_WAKE, 1, NULL, NULL, 0); + } + + return 0; +} Index: lib/librthread/rthread.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread.c,v retrieving revision 1.94 diff -u -p -r1.94 rthread.c --- lib/librthread/rthread.c 4 Sep 2016 10:13:35 -0000 1.94 +++ lib/librthread/rthread.c 4 Sep 2016 13:12:40 -0000 @@ -89,20 +89,19 @@ struct pthread_attr _rthread_attr_defaul void _spinlock(volatile _atomic_lock_t *lock) { - while (_atomic_lock(lock)) - sched_yield(); + futex_lock(lock); } int _spinlocktry(volatile _atomic_lock_t *lock) { - return 0 == _atomic_lock(lock); + return 0 == futex_trylock(lock); } void _spinunlock(volatile _atomic_lock_t *lock) { - *lock = _ATOMIC_LOCK_UNLOCKED; + futex_unlock(lock); } static void Index: lib/librthread/rthread.h =================================================================== RCS file: /cvs/src/lib/librthread/rthread.h,v retrieving revision 1.60 diff -u -p -r1.60 rthread.h --- lib/librthread/rthread.h 4 Sep 2016 10:13:35 -0000 1.60 +++ lib/librthread/rthread.h 4 Sep 2016 13:12:40 -0000 @@ -215,3 +215,8 @@ int __thrsleep(const volatile void *, cl volatile void *, const int *); int __thrwakeup(const volatile void *, int n); int __thrsigdivert(sigset_t, siginfo_t *, const struct timespec *); + +/* futex.c */ +int futex_lock(volatile _atomic_lock_t *); +int futex_trylock(volatile _atomic_lock_t *); +int futex_unlock(volatile _atomic_lock_t *); Index: lib/libc/Symbols.list =================================================================== RCS file: /cvs/src/lib/libc/Symbols.list,v retrieving revision 1.50 diff -u -p -r1.50 Symbols.list --- lib/libc/Symbols.list 3 Sep 2016 16:25:03 -0000 1.50 +++ lib/libc/Symbols.list 4 Sep 2016 13:12:40 -0000 @@ -272,6 +272,7 @@ fstatat fstatfs fsync ftruncate +futex futimens futimes getdents Index: lib/libc/sys/Makefile.inc =================================================================== RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v retrieving revision 1.146 diff -u -p -r1.146 Makefile.inc --- lib/libc/sys/Makefile.inc 4 Jul 2016 18:01:44 -0000 1.146 +++ lib/libc/sys/Makefile.inc 4 Sep 2016 13:12:41 -0000 @@ -87,6 +87,7 @@ DASM= ${ASM:.o=.do} # syscalls that CANNOT FAIL. They can return whatever value they want, # they just never want to set errno. ASM_NOERR=__get_tcb.o __set_tcb.o __threxit.o __thrsleep.o __thrwakeup.o \ + futex.o \ getdtablecount.o getegid.o geteuid.o getgid.o getlogin_r.o \ getpgrp.o getpid.o getppid.o getrtable.o getthrid.o getuid.o \ issetugid.o \ -- Michal Mazurek