Mostly for people asking for my secret diffs. This is the cas diff...
Index: i386/locore.s =================================================================== RCS file: /cvs/src/sys/arch/i386/i386/locore.s,v retrieving revision 1.137 diff -u -p -r1.137 locore.s --- i386/locore.s 5 Jul 2011 00:30:10 -0000 1.137 +++ i386/locore.s 8 Jul 2011 20:57:25 -0000 @@ -1695,6 +1695,41 @@ ENTRY(acpi_release_global_lock) ret #endif +/* + * ucas_32(volatile int32_t *uptr, int32_t old, int32_t new); + */ +ENTRY(ucas_32) +#ifdef DDB + pushl %ebp + movl %esp,%ebp +#endif + pushl %esi + pushl %edi + pushl $0 + + movl 16+FPADD(%esp),%esi + movl 20+FPADD(%esp),%edi + movl 24+FPADD(%esp),%eax + + cmpl $VM_MAXUSER_ADDRESS-4, %edx + ja _C_LABEL(copy_fault) + +3: GET_CURPCB(%edx) + movl $_C_LABEL(copy_fault),PCB_ONFAULT(%edx) + + lock + cmpxchgl %ecx, (%edx) + + popl PCB_ONFAULT(%edx) + popl %edi + popl %esi + xorl %eax,%eax +#ifdef DDB + leave +#endif + ret + + #if NLAPIC > 0 #include <i386/i386/apicvec.s> #endif Index: include/atomic.h =================================================================== RCS file: /cvs/src/sys/arch/i386/include/atomic.h,v retrieving revision 1.8 diff -u -p -r1.8 atomic.h --- include/atomic.h 23 Mar 2011 16:54:35 -0000 1.8 +++ include/atomic.h 8 Jul 2011 20:57:25 -0000 @@ -99,6 +99,9 @@ i486_atomic_cas_int(volatile u_int *ptr, return (res); } +int ucas_32(volatile int32_t *, int32_t, int32_t); +#define atomic_ucas_32 ucas_32 + #define atomic_setbits_int i386_atomic_setbits_l #define atomic_clearbits_int i386_atomic_clearbits_l Which is needed by this futex diff: Index: files.linux =================================================================== RCS file: /cvs/src/sys/compat/linux/files.linux,v retrieving revision 1.15 diff -u -p -r1.15 files.linux --- files.linux 10 Feb 2011 11:58:43 -0000 1.15 +++ files.linux 8 Jul 2011 21:02:07 -0000 @@ -27,4 +27,5 @@ file compat/linux/linux_syscalls.c comp file compat/linux/linux_sysent.c compat_linux file compat/linux/linux_termios.c compat_linux file compat/linux/linux_time.c compat_linux +file compat/linux/linux_futex.c compat_linux file compat/linux/linux_dummy.c compat_linux Index: linux_emuldata.h =================================================================== RCS file: /cvs/src/sys/compat/linux/linux_emuldata.h,v retrieving revision 1.6 diff -u -p -r1.6 linux_emuldata.h --- linux_emuldata.h 5 Apr 2011 22:54:30 -0000 1.6 +++ linux_emuldata.h 8 Jul 2011 21:02:07 -0000 @@ -48,5 +48,8 @@ struct linux_emuldata { void *my_set_tid; void *my_clear_tid; unsigned my_tls_base; + + struct linux_robust_list_head *led_robust_head; + }; #endif /* !_LINUX_EMULDATA_H_ */ Index: linux_exec.c =================================================================== RCS file: /cvs/src/sys/compat/linux/linux_exec.c,v retrieving revision 1.34 diff -u -p -r1.34 linux_exec.c --- linux_exec.c 20 Apr 2011 19:14:34 -0000 1.34 +++ linux_exec.c 8 Jul 2011 21:02:07 -0000 @@ -77,6 +77,9 @@ extern struct sysent linux_sysent[]; extern char *linux_syscallnames[]; #endif +extern struct mutex futex_lock; +extern void futex_pool_init(void); + int exec_linux_aout_prep_zmagic(struct proc *, struct exec_package *); int exec_linux_aout_prep_nmagic(struct proc *, struct exec_package *); int exec_linux_aout_prep_omagic(struct proc *, struct exec_package *); @@ -159,6 +162,8 @@ linux_e_proc_init(struct proc *p, struct /* Set the process idea of the break to the real value */ ((struct linux_emuldata *)(p->p_emuldata))->p_break = vmspace->vm_daddr + ptoa(vmspace->vm_dsize); + + printf("Initialize futex mutex and pools.\n"); } void @@ -463,6 +468,7 @@ exec_linux_elf32_makecmds(struct proc *p { if (!(emul_linux_elf.e_flags & EMUL_ENABLED)) return (ENOEXEC); + return exec_elf32_makecmds(p, epp); } @@ -510,6 +516,10 @@ recognized: *pos = ELF32_NO_ADDR; if (*os == OOS_NULL) *os = OOS_LINUX; + + mtx_init(&futex_lock, IPL_NONE); + futex_pool_init(); + return (0); } Index: linux_futex.c =================================================================== RCS file: linux_futex.c diff -N linux_futex.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ linux_futex.c 8 Jul 2011 21:02:07 -0000 @@ -0,0 +1,670 @@ +/* $OpenBSD$ */ +/* $NetBSD: linux_futex.c,v 1.26 2010/07/07 01:30:35 chs Exp $ */ + +/*- + * Copyright (c) 2011 Paul Irofti <piro...@openbsd.org> + * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Emmanuel Dreyfus + * 4. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/ucred.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/signal.h> +#include <sys/stdint.h> +#include <sys/time.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/pool.h> +#include <sys/kernel.h> + +#include <sys/syscallargs.h> + +#include <compat/linux/linux_types.h> +#include <compat/linux/linux_fcntl.h> +#include <compat/linux/linux_misc.h> +#include <compat/linux/linux_mmap.h> +#include <compat/linux/linux_sched.h> +#include <compat/linux/linux_signal.h> +#include <compat/linux/linux_syscallargs.h> +#include <compat/linux/linux_util.h> +#include <compat/linux/linux_dirent.h> +#include <compat/linux/linux_emuldata.h> + +#include <compat/linux/linux_time.h> +#include <compat/linux/linux_futex.h> + +#define COMPATFUTEX_DEBUG +#ifdef COMPATFUTEX_DEBUG +#define DPRINTF(x) printf x +#else +#define DPRINTF(x) +#endif + +struct pool futex_pool; +struct pool futex_wp_pool; + +struct futex; + +struct waiting_proc { + struct proc *p; + struct futex *wp_new_futex; + TAILQ_ENTRY(waiting_proc) wp_list; + TAILQ_ENTRY(waiting_proc) wp_rqlist; +}; + +struct futex { + void *f_uaddr; + int f_refcount; + LIST_ENTRY(futex) f_list; + TAILQ_HEAD(, waiting_proc) f_waiting_proc; + TAILQ_HEAD(, waiting_proc) f_requeue_proc; +}; + +static LIST_HEAD(futex_list, futex) futex_list; + +struct mutex futex_lock; +void futex_pool_init(void); + +int linux_do_futex(struct proc *, const struct linux_sys_futex_args *, + register_t *, struct timespec *); + +struct futex * futex_get(void *); +void futex_ref(struct futex *); +void futex_put(struct futex *); + +int futex_sleep(struct futex **, struct proc *, int, struct waiting_proc *); +int futex_wake(struct futex *, int, struct futex *, int); +int futex_atomic_op(struct proc *, int, void *); + +int itimespecfix(struct timespec *ts); +int tstohz(const struct timespec *); + +int +linux_sys_futex(struct proc *p, void *v, register_t *retval) +{ + struct linux_sys_futex_args /* { + syscallarg(int *) uaddr; + syscallarg(int) op; + syscallarg(int) val; + syscallarg(const struct linux_timespec *) timeout; + syscallarg(int *) uaddr2; + syscallarg(int) val3; + } */ *uap = v; + + struct l_timespec lts; + struct timespec ts = {0, 0}; + int error; + + if ((SCARG(uap, op) & ~LINUX_FUTEX_PRIVATE_FLAG) == LINUX_FUTEX_WAIT && + SCARG(uap, timeout) != NULL) { + if ((error = copyin(SCARG(uap, timeout), + <s, sizeof(lts))) != 0) { + return error; + } + linux_to_native_timespec(&ts, <s); + } + + return linux_do_futex(p, uap, retval, &ts); +} + +int +linux_do_futex(struct proc *p, const struct linux_sys_futex_args *uap, + register_t *retval, struct timespec *ts) +{ + /* { + syscallarg(int *) uaddr; + syscallarg(int) op; + syscallarg(int) val; + syscallarg(const struct linux_timespec *) timeout; + syscallarg(int *) uaddr2; + syscallarg(int) val3; + } */ + int val; + int ret; + int error = 0; + struct futex *f; + struct futex *newf; + int timeout_hz; + struct futex *f2; + struct waiting_proc *wp; + int op_ret; + + /* + * Our implementation provides only private futexes. Most of the apps + * should use private futexes but don't claim so. Therefore we treat + * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works + * in most cases (ie. when futexes are not shared on file descriptor + * or between different processes). + */ + switch (SCARG(uap, op) & ~LINUX_FUTEX_PRIVATE_FLAG) { + case LINUX_FUTEX_WAIT: + if ((error = copyin(SCARG(uap, uaddr), + &val, sizeof(val))) != 0) { + return error; + } + + if (val != SCARG(uap, val)) { + return EWOULDBLOCK; + } + + DPRINTF(("FUTEX_WAIT %d: val = %d, uaddr = %p, " + "*uaddr = %d, timeout = %lld.%09ld\n", + p->p_pid, SCARG(uap, val), + SCARG(uap, uaddr), val, (long long)ts->tv_sec, + ts->tv_nsec)); + + if ((error = itimespecfix(ts)) != 0) { + return error; + } + timeout_hz = tstohz(ts); + + /* + * If the user process requests a non null timeout, + * make sure we do not turn it into an infinite + * timeout because timeout_hz is 0. + * + * We use a minimal timeout of 1/hz. Maybe it would make + * sense to just return ETIMEDOUT without sleeping. + */ + if (SCARG(uap, timeout) != NULL && timeout_hz == 0) + timeout_hz = 1; + + wp = pool_get(&futex_wp_pool, PR_WAITOK); + f = futex_get(SCARG(uap, uaddr)); + ret = futex_sleep(&f, p, timeout_hz, wp); + futex_put(f); + pool_put(&futex_wp_pool, wp); + + DPRINTF(("FUTEX_WAIT %d: uaddr = %p, " + "ret = %d\n", p->p_pid, + SCARG(uap, uaddr), ret)); + + switch (ret) { + case EWOULDBLOCK: /* timeout */ + return ETIMEDOUT; + break; + case EINTR: /* signal */ + return EINTR; + break; + case 0: /* FUTEX_WAKE received */ + DPRINTF(("FUTEX_WAIT %d: uaddr = %p, got it\n", + p->p_pid, SCARG(uap, uaddr))); + return 0; + break; + default: + DPRINTF(("FUTEX_WAIT: unexpected ret = %d\n", ret)); + break; + } + + /* NOTREACHED */ + break; + + case LINUX_FUTEX_WAKE: + /* + * XXX: Linux is able cope with different addresses + * corresponding to the same mapped memory in the sleeping + * and the waker process(es). + */ + DPRINTF(("FUTEX_WAKE %d: uaddr = %p, val = %d\n", + p->p_pid, + SCARG(uap, uaddr), SCARG(uap, val))); + + f = futex_get(SCARG(uap, uaddr)); + *retval = futex_wake(f, SCARG(uap, val), NULL, 0); + futex_put(f); + + break; + + case LINUX_FUTEX_CMP_REQUEUE: + + if ((error = copyin(SCARG(uap, uaddr), + &val, sizeof(val))) != 0) { + return error; + } + + if (val != SCARG(uap, val3)) { + return EAGAIN; + } + + DPRINTF(("FUTEX_CMP_REQUEUE %d: uaddr = %p, val = %d, " + "uaddr2 = %p, val2 = %d\n", + p->p_pid, + SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2), + (int)(unsigned long)SCARG(uap, timeout))); + + f = futex_get(SCARG(uap, uaddr)); + newf = futex_get(SCARG(uap, uaddr2)); + *retval = futex_wake(f, SCARG(uap, val), newf, + (int)(unsigned long)SCARG(uap, timeout)); + futex_put(f); + futex_put(newf); + + break; + + case LINUX_FUTEX_REQUEUE: + DPRINTF(("FUTEX_REQUEUE %d: uaddr = %p, val = %d, " + "uaddr2 = %p, val2 = %d\n", + p->p_pid, + SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2), + (int)(unsigned long)SCARG(uap, timeout))); + + f = futex_get(SCARG(uap, uaddr)); + newf = futex_get(SCARG(uap, uaddr2)); + *retval = futex_wake(f, SCARG(uap, val), newf, + (int)(unsigned long)SCARG(uap, timeout)); + futex_put(f); + futex_put(newf); + + break; + + case LINUX_FUTEX_FD: + DPRINTF(("linux_sys_futex: unimplemented op %d\n", + SCARG(uap, op))); + return ENOSYS; + case LINUX_FUTEX_WAKE_OP: + DPRINTF(("FUTEX_WAKE_OP %d: uaddr = %p, op = %d, " + "val = %d, uaddr2 = %p, val2 = %d\n", + p->p_pid, + SCARG(uap, uaddr), SCARG(uap, op), SCARG(uap, val), + SCARG(uap, uaddr2), + (int)(unsigned long)SCARG(uap, timeout))); + + f = futex_get(SCARG(uap, uaddr)); + f2 = futex_get(SCARG(uap, uaddr2)); + + /* + * This function returns positive number as results and + * negative as errors + */ + op_ret = futex_atomic_op(p, SCARG(uap, val3), SCARG(uap, uaddr2)); + if (op_ret < 0) { + futex_put(f); + futex_put(f2); + return -op_ret; + } + + ret = futex_wake(f, SCARG(uap, val), NULL, 0); + futex_put(f); + if (op_ret > 0) { + op_ret = 0; + /* + * Linux abuses the address of the timespec parameter + * as the number of retries + */ + op_ret += futex_wake(f2, + (int)(unsigned long)SCARG(uap, timeout), NULL, 0); + ret += op_ret; + } + futex_put(f2); + *retval = ret; + break; + default: + DPRINTF(("linux_sys_futex: unknown op %d\n", + SCARG(uap, op))); + return ENOSYS; + } + return 0; +} + +void +futex_pool_init(void) +{ + DPRINTF(("Inside futex_pool_init()\n")); + mtx_init(&futex_lock, IPL_NONE); + pool_init(&futex_pool, sizeof(struct futex), 0, 0, PR_DEBUGCHK, + "futexpl", &pool_allocator_nointr); + pool_init(&futex_wp_pool, sizeof(struct waiting_proc), 0, 0, + PR_DEBUGCHK, "futexwppl", &pool_allocator_nointr); +} + +struct futex * +futex_get(void *uaddr) +{ + struct futex *f, *newf; + + mtx_enter(&futex_lock); + LIST_FOREACH(f, &futex_list, f_list) { + if (f->f_uaddr == uaddr) { + f->f_refcount++; + mtx_leave(&futex_lock); + return f; + } + } + mtx_leave(&futex_lock); + + /* Not found, create it */ + newf = pool_get(&futex_pool, PR_WAITOK|PR_ZERO); + + mtx_enter(&futex_lock); + /* Did someone else create it in the meantime? */ + LIST_FOREACH(f, &futex_list, f_list) { + if (f->f_uaddr == uaddr) { + f->f_refcount++; + mtx_leave(&futex_lock); + pool_put(&futex_pool, newf); + return f; + } + } + newf->f_uaddr = uaddr; + newf->f_refcount = 1; + TAILQ_INIT(&newf->f_waiting_proc); + TAILQ_INIT(&newf->f_requeue_proc); + LIST_INSERT_HEAD(&futex_list, newf, f_list); + mtx_leave(&futex_lock); + + return newf; +} + +void +futex_ref(struct futex *f) +{ + mtx_enter(&futex_lock); + f->f_refcount++; + mtx_leave(&futex_lock); +} + +void +futex_put(struct futex *f) +{ + mtx_enter(&futex_lock); + f->f_refcount--; + if (f->f_refcount == 0) { + KASSERT(TAILQ_EMPTY(&f->f_waiting_proc)); + KASSERT(TAILQ_EMPTY(&f->f_requeue_proc)); + LIST_REMOVE(f, f_list); + pool_put(&futex_pool, f); + } + mtx_leave(&futex_lock); +} + +int +futex_sleep(struct futex **fp, struct proc *p, int timeout, + struct waiting_proc *wp) +{ + struct futex *f, *newf; + int ret; + + f = *fp; + wp->p = p; + wp->wp_new_futex = NULL; + +requeue: + TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); + + mtx_enter(&futex_lock); + ret = msleep(&f, &futex_lock, PUSER | PCATCH, "futex_sleep", timeout); + + TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); + + /* if futex_wake() tells us to requeue ... */ + newf = wp->wp_new_futex; + if (ret == 0 && newf != NULL) { + /* ... requeue ourselves on the new futex */ + futex_put(f); + wp->wp_new_futex = NULL; + TAILQ_REMOVE(&newf->f_requeue_proc, wp, wp_rqlist); + *fp = f = newf; + goto requeue; + } + return ret; +} + +int +futex_wake(struct futex *f, int n, struct futex *newf, int n2) +{ + struct waiting_proc *wp; + int count; + + KASSERT(newf != f); + + count = newf ? 0 : 1; + + /* + * first, wake up any threads sleeping on this futex. + * note that sleeping threads are not in the process of requeueing. + */ + TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) { + KASSERT(wp->wp_new_futex == NULL); + + DPRINTF(("futex_wake: signal f %p ref %d\n", + f, f->f_refcount)); + wakeup(&f); + if (count <= n) { + count++; + } else { + if (newf == NULL) + break; + + /* matching futex_put() is called by the other thread. */ + futex_ref(newf); + mtx_enter(&futex_lock); + wp->wp_new_futex = newf; + TAILQ_INSERT_TAIL(&newf->f_requeue_proc, wp, wp_rqlist); + DPRINTF(("futex_wake: requeue newf %p ref %d\n", + newf, newf->f_refcount)); + mtx_leave(&futex_lock); + if (count - n >= n2) + goto out; + } + } + + /* + * next, deal with threads that are requeuing to this futex. + * we don't need to signal these threads, any thread on the + * requeue list has already been signaled but hasn't had a chance + * to run and requeue itself yet. if we would normally wake + * a thread, just remove the requeue info. if we would normally + * requeue a thread, change the requeue target. + */ + + while ((wp = TAILQ_FIRST(&f->f_requeue_proc)) != NULL) { + KASSERT(wp->wp_new_futex == f); + + DPRINTF(("futex_wake: unrequeue f %p ref %d\n", + f, f->f_refcount)); + mtx_enter(&futex_lock); + wp->wp_new_futex = NULL; + TAILQ_REMOVE(&f->f_requeue_proc, wp, wp_rqlist); + mtx_leave(&futex_lock); + futex_put(f); + + if (count <= n) { + count++; + } else { + if (newf == NULL) + break; + + /*matching futex_put() is called by the other thread.*/ + futex_ref(newf); + mtx_enter(&futex_lock); + wp->wp_new_futex = newf; + TAILQ_INSERT_TAIL(&newf->f_requeue_proc, wp, wp_rqlist); + DPRINTF(("futex_wake: rerequeue newf %p ref %d\n", + newf, newf->f_refcount)); + mtx_leave(&futex_lock); + if (count - n >= n2) + break; + } + } + +out: + return count; +} + +int +futex_atomic_op(struct proc *p, int encoded_op, void *uaddr) +{ + const int op = (encoded_op >> 28) & 7; + const int cmp = (encoded_op >> 24) & 15; + const int cmparg = (encoded_op << 20) >> 20; + int oparg = (encoded_op << 8) >> 20; + int error, oldval, cval; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + /* XXX: linux verifies access here and returns EFAULT */ + + if (copyin(uaddr, &cval, sizeof(int)) != 0) + return -EFAULT; + + for (;;) { + int nval; + + switch (op) { + case FUTEX_OP_SET: + nval = oparg; + break; + case FUTEX_OP_ADD: + nval = cval + oparg; + break; + case FUTEX_OP_OR: + nval = cval | oparg; + break; + case FUTEX_OP_ANDN: + nval = cval & ~oparg; + break; + case FUTEX_OP_XOR: + nval = cval ^ oparg; + break; + default: + return -ENOSYS; + } + + oldval = nval; + error = atomic_ucas_32(uaddr, cval, nval); + if (oldval == cval || error) { + break; + } + cval = oldval; + } + + if (error) + return -EFAULT; + + switch (cmp) { + case FUTEX_OP_CMP_EQ: + return (oldval == cmparg); + case FUTEX_OP_CMP_NE: + return (oldval != cmparg); + case FUTEX_OP_CMP_LT: + return (oldval < cmparg); + case FUTEX_OP_CMP_GE: + return (oldval >= cmparg); + case FUTEX_OP_CMP_LE: + return (oldval <= cmparg); + case FUTEX_OP_CMP_GT: + return (oldval > cmparg); + default: + return -ENOSYS; + } +} + +int +linux_sys_set_robust_list(struct proc *p, void *v, register_t *retval) +{ + struct linux_sys_set_robust_list_args /* { + syscallarg(struct linux_robust_list_head *) head; + syscallarg(size_t) len; + } */ *uap = v; + struct linux_emuldata *led; + + if (SCARG(uap, len) != sizeof(struct linux_robust_list_head)) + return EINVAL; + led = p->p_emuldata; + led->led_robust_head = SCARG(uap, head); + *retval = 0; + return 0; +} + +int +linux_sys_get_robust_list(struct proc *p, void *v, register_t *retval) +{ + struct linux_sys_get_robust_list_args /* { + syscallarg(int) pid; + syscallarg(struct linux_robust_list_head **) head; + syscallarg(size_t *) len; + } */ *uap = v; + struct proc *q; + struct linux_emuldata *led; + struct linux_robust_list_head *head; + size_t len; + int error = 0; + + if (!SCARG(uap, pid)) { + led = p->p_emuldata; + head = led->led_robust_head; + } else { + q = pfind(SCARG(uap, pid)); + if (q != NULL) { + if (p != q && p->p_p != q->p_p) + return EPERM; + led = p->p_emuldata; + head = led->led_robust_head; + } + if (p == NULL) { + return ESRCH; + } + } + + len = sizeof(*head); + error = copyout(&len, SCARG(uap, len), sizeof(len)); + if (error) + return error; + return copyout(&head, SCARG(uap, head), sizeof(head)); +} + +/* + * Compute number of ticks in the specified amount of time. + */ +int +tstohz(const struct timespec *ts) +{ + struct timeval tv; + + /* + * usec has great enough resolution for hz, so convert to a + * timeval and use tvtohz() above. + */ + TIMESPEC_TO_TIMEVAL(&tv, ts); + return tvtohz(&tv); +} + +int +itimespecfix(struct timespec *ts) +{ + if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) + return EINVAL; + if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000) + ts->tv_nsec = tick * 1000; + return 0; +} + Index: linux_futex.h =================================================================== RCS file: linux_futex.h diff -N linux_futex.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ linux_futex.h 8 Jul 2011 21:02:07 -0000 @@ -0,0 +1,77 @@ +/* $OpenBSD$ */ +/* $NetBSD: linux_futex.h,v 1.4 2010/07/07 01:30:35 chs Exp $ */ + +/*- + * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Emmanuel Dreyfus + * 4. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_FUTEX_H_ +#define _LINUX_FUTEX_H_ + +#define LINUX_FUTEX_WAIT 0 +#define LINUX_FUTEX_WAKE 1 +#define LINUX_FUTEX_FD 2 +#define LINUX_FUTEX_REQUEUE 3 +#define LINUX_FUTEX_CMP_REQUEUE 4 +#define LINUX_FUTEX_WAKE_OP 5 + +#define LINUX_FUTEX_PRIVATE_FLAG 128 + +#define FUTEX_OP_SET 0 +#define FUTEX_OP_ADD 1 +#define FUTEX_OP_OR 2 +#define FUTEX_OP_ANDN 3 +#define FUTEX_OP_XOR 4 + +#define FUTEX_OP_OPARG_SHIFT 8 + +#define FUTEX_OP_CMP_EQ 0 +#define FUTEX_OP_CMP_NE 1 +#define FUTEX_OP_CMP_LT 2 +#define FUTEX_OP_CMP_LE 3 +#define FUTEX_OP_CMP_GT 4 +#define FUTEX_OP_CMP_GE 5 + +struct linux_robust_list { + struct linux_robust_list *next; +}; + +struct linux_robust_list_head { + struct linux_robust_list list; + unsigned long futex_offset; + struct linux_robust_list *pending_list; +}; + +#define FUTEX_WAITERS 0x80000000 +#define FUTEX_OWNER_DIED 0x40000000 +#define FUTEX_TID_MASK 0x3fffffff + + +#endif /* !_LINUX_FUTEX_H_ */ Index: linux_syscall.h =================================================================== RCS file: /cvs/src/sys/compat/linux/linux_syscall.h,v retrieving revision 1.62 diff -u -p -r1.62 linux_syscall.h --- linux_syscall.h 8 Jul 2011 19:19:59 -0000 1.62 +++ linux_syscall.h 8 Jul 2011 21:02:07 -0000 @@ -718,6 +718,9 @@ /* syscall: "fremovexattr" ret: "int" args: */ #define LINUX_SYS_fremovexattr 237 +/* syscall: "futex" ret: "int" args: "int *" "int" "int" "const struct linux_timespec *" "int *" "int" */ +#define LINUX_SYS_futex 240 + /* syscall: "set_thread_area" ret: "int" args: "struct l_segment_descriptor *" */ #define LINUX_SYS_set_thread_area 243 @@ -738,5 +741,11 @@ /* syscall: "clock_getres" ret: "int" args: "clockid_t" "struct l_timespec *" */ #define LINUX_SYS_clock_getres 266 + +/* syscall: "set_robust_list" ret: "int" args: "struct linux_robust_list_head *" "size_t" */ +#define LINUX_SYS_set_robust_list 311 + +/* syscall: "get_robust_list" ret: "int" args: "int" "struct linux_robust_list_head **" "size_t *" */ +#define LINUX_SYS_get_robust_list 312 #define LINUX_SYS_MAXSYSCALL 313 Index: linux_syscallargs.h =================================================================== RCS file: /cvs/src/sys/compat/linux/linux_syscallargs.h,v retrieving revision 1.64 diff -u -p -r1.64 linux_syscallargs.h --- linux_syscallargs.h 8 Jul 2011 19:19:59 -0000 1.64 +++ linux_syscallargs.h 8 Jul 2011 21:02:07 -0000 @@ -564,6 +564,15 @@ struct linux_sys_fcntl64_args { syscallarg(void *) arg; }; +struct linux_sys_futex_args { + syscallarg(int *) uaddr; + syscallarg(int) op; + syscallarg(int) val; + syscallarg(const struct linux_timespec *) timeout; + syscallarg(int *) uaddr2; + syscallarg(int) val3; +}; + struct linux_sys_set_thread_area_args { syscallarg(struct l_segment_descriptor *) desc; }; @@ -586,6 +595,17 @@ struct linux_sys_clock_getres_args { syscallarg(struct l_timespec *) tp; }; +struct linux_sys_set_robust_list_args { + syscallarg(struct linux_robust_list_head *) head; + syscallarg(size_t) len; +}; + +struct linux_sys_get_robust_list_args { + syscallarg(int) pid; + syscallarg(struct linux_robust_list_head **) head; + syscallarg(size_t *) len; +}; + /* * System call prototypes. */ @@ -842,6 +862,7 @@ int linux_sys_flistxattr(struct proc *, int linux_sys_removexattr(struct proc *, void *, register_t *); int linux_sys_lremovexattr(struct proc *, void *, register_t *); int linux_sys_fremovexattr(struct proc *, void *, register_t *); +int linux_sys_futex(struct proc *, void *, register_t *); int linux_sys_set_thread_area(struct proc *, void *, register_t *); int linux_sys_get_thread_area(struct proc *, void *, register_t *); int linux_sys_fadvise64(struct proc *, void *, register_t *); @@ -849,3 +870,5 @@ int sys_exit(struct proc *, void *, regi int linux_sys_set_tid_address(struct proc *, void *, register_t *); int linux_sys_clock_gettime(struct proc *, void *, register_t *); int linux_sys_clock_getres(struct proc *, void *, register_t *); +int linux_sys_set_robust_list(struct proc *, void *, register_t *); +int linux_sys_get_robust_list(struct proc *, void *, register_t *); Index: linux_syscalls.c =================================================================== RCS file: /cvs/src/sys/compat/linux/linux_syscalls.c,v retrieving revision 1.62 diff -u -p -r1.62 linux_syscalls.c --- linux_syscalls.c 8 Jul 2011 19:19:59 -0000 1.62 +++ linux_syscalls.c 8 Jul 2011 21:02:08 -0000 @@ -268,7 +268,7 @@ char *linux_syscallnames[] = { "fremovexattr", /* 237 = fremovexattr */ "#238 (unimplemented linux_sys_tkill)", /* 238 = unimplemented linux_sys_tkill */ "#239 (unimplemented linux_sys_sendfile64)", /* 239 = unimplemented linux_sys_sendfile64 */ - "#240 (unimplemented linux_sys_futex)", /* 240 = unimplemented linux_sys_futex */ + "futex", /* 240 = futex */ "#241 (unimplemented linux_sys_sched_setaffinity)", /* 241 = unimplemented linux_sys_sched_setaffinity */ "#242 (unimplemented linux_sys_sched_getaffinity)", /* 242 = unimplemented linux_sys_sched_getaffinity */ "set_thread_area", /* 243 = set_thread_area */ @@ -339,6 +339,6 @@ char *linux_syscallnames[] = { "#308 (unimplemented linux_sys_pselect6)", /* 308 = unimplemented linux_sys_pselect6 */ "#309 (unimplemented linux_sys_ppoll)", /* 309 = unimplemented linux_sys_ppoll */ "#310 (unimplemented linux_sys_unshare)", /* 310 = unimplemented linux_sys_unshare */ - "#311 (unimplemented linux_sys_set_robust_list)", /* 311 = unimplemented linux_sys_set_robust_list */ - "#312 (unimplemented linux_sys_get_robust_list)", /* 312 = unimplemented linux_sys_get_robust_list */ + "set_robust_list", /* 311 = set_robust_list */ + "get_robust_list", /* 312 = get_robust_list */ }; Index: linux_sysent.c =================================================================== RCS file: /cvs/src/sys/compat/linux/linux_sysent.c,v retrieving revision 1.63 diff -u -p -r1.63 linux_sysent.c --- linux_sysent.c 8 Jul 2011 19:19:59 -0000 1.63 +++ linux_sysent.c 8 Jul 2011 21:02:08 -0000 @@ -526,8 +526,8 @@ struct sysent linux_sysent[] = { sys_nosys }, /* 238 = unimplemented linux_sys_tkill */ { 0, 0, 0, sys_nosys }, /* 239 = unimplemented linux_sys_sendfile64 */ - { 0, 0, 0, - sys_nosys }, /* 240 = unimplemented linux_sys_futex */ + { 6, s(struct linux_sys_futex_args), 0, + linux_sys_futex }, /* 240 = futex */ { 0, 0, 0, sys_nosys }, /* 241 = unimplemented linux_sys_sched_setaffinity */ { 0, 0, 0, @@ -668,9 +668,9 @@ struct sysent linux_sysent[] = { sys_nosys }, /* 309 = unimplemented linux_sys_ppoll */ { 0, 0, 0, sys_nosys }, /* 310 = unimplemented linux_sys_unshare */ - { 0, 0, 0, - sys_nosys }, /* 311 = unimplemented linux_sys_set_robust_list */ - { 0, 0, 0, - sys_nosys }, /* 312 = unimplemented linux_sys_get_robust_list */ + { 2, s(struct linux_sys_set_robust_list_args), 0, + linux_sys_set_robust_list }, /* 311 = set_robust_list */ + { 3, s(struct linux_sys_get_robust_list_args), 0, + linux_sys_get_robust_list }, /* 312 = get_robust_list */ }; Index: syscalls.master =================================================================== RCS file: /cvs/src/sys/compat/linux/syscalls.master,v retrieving revision 1.59 diff -u -p -r1.59 syscalls.master --- syscalls.master 8 Jul 2011 19:19:20 -0000 1.59 +++ syscalls.master 8 Jul 2011 21:02:08 -0000 @@ -379,7 +379,9 @@ 237 NOARGS { int linux_sys_fremovexattr(void); } 238 UNIMPL linux_sys_tkill 239 UNIMPL linux_sys_sendfile64 -240 UNIMPL linux_sys_futex +240 STD { int linux_sys_futex(int *uaddr, int op, int val, \ + const struct linux_timespec *timeout, \ + int *uaddr2, int val3); } 241 UNIMPL linux_sys_sched_setaffinity 242 UNIMPL linux_sys_sched_getaffinity 243 STD { int linux_sys_set_thread_area( \ @@ -454,5 +456,9 @@ 308 UNIMPL linux_sys_pselect6 309 UNIMPL linux_sys_ppoll 310 UNIMPL linux_sys_unshare -311 UNIMPL linux_sys_set_robust_list -312 UNIMPL linux_sys_get_robust_list +311 STD { int linux_sys_set_robust_list( \ + struct linux_robust_list_head *head, size_t len); } +312 STD { int linux_sys_get_robust_list(int pid, \ + struct linux_robust_list_head **head, \ + size_t *len); } +