> Date: Sat, 2 Jun 2018 12:42:14 +0200 (CEST)
> From: Mark Kettenis <[email protected]>
>
> > Date: Sat, 2 Jun 2018 12:04:16 +0200
> > From: Martin Pieuchot <[email protected]>
> >
> > On 01/06/18(Fri) 19:13, Mark Kettenis wrote:
> > > The diff below changes our futex implementation to allow futexes that
> > > are shared between processes. Such futexes are a little bit tricky
> > > since there is no guarantee that a futex is mapped at the same address
> > > in each process. To solve tis issue I adopted the strategy chosen by
> > > FreeBSD. Shared memory objects have a uvm object associated with
> > > them. So instead of using the virtual address I can use the offset
> > > into that object. Together with the a pointer to the object itself
> > > that should form a unique ID for the futex.
> >
> > Nice.
> >
> > > In order to establish that we're dealing with shared memory we need to
> > > lookup the mapping corresponding to the userspace address of the
> > > futex. That involves locking the map. If we are worried about the
> > > additional overhead we could introduce shared versions of the futex
> > > operations and skip the lookup for non-shared (process-private) futexes.
> > >
> > > These futexes could be used to implement support for the POSIX Thread
> > > Process-Shared Synchronization option, i.e. the PTHREAD_PROCESS_SHARED
> > > attribute for mutexes. But I need these to make libxshmfence work on
> > > OpenBSD, which is in turn needed to get DRI3 support working.
> > >
> > > Making DRI3 support work is somewhat important. We're currently using
> > > DRI2 but Linux has moved to DRI3 and some of the DRI2-specific
> > > codepaths in X are starting to rot. DRI3 is also supposed to be more
> > > secure than DRI2. With DRI2, applications that can guess a 32-bit
> > > handle can in principle access graphiucs buffers from other
> > > applications. With DRI3 graphics buffers are shared using file
> > > descriptor passing which means that only processes that were passed
> > > the file descriptor can access the graphics buffer.
> > >
> > > Another feature of DRI3 is that graphics buffers can be shared between
> > > graphics devices. The idea there is that a GPU can render into a
> > > buffer which is then shared with the Intel CPU graphics which can then
> > > display it on the screen. But my initial implementation won't support
> > > that.
> > >
> > > Thoughts? Especially on whether we want to distinguish between
> > > process-shared and process-private futexes!
> >
> > I'd prefer if we could do the distinction. Would it be enough to pass
> > a flag to futex_get()?
>
> More or less. The question is what what we should do if someone asks
> for a process-shared futex in memory that isn't shared. Should we
> automatically create a process-private futex? Or should this result
> in an error?
>
> I suspect that people expect PTHREAD_PROCESS_SHARED mutexes to work
> even if they're not created in shared memory. So a failure to create
> a process-shared futex should probably fall through into the
> process-private case.
>
> Turns out Linux already has a FUTEX_PRIVATE_FLAG and
> FUTEX_WAIT_PRIVATE, FUTEX_WAKE_PRIVATE and FUTEX_REQUEUE_PRIVATE
> operations. So I'll just stick to that API.
So here is an implementation of that APU. After this gets committed
we can change libc to use the _PRIVATE versions of the mutex
operations. I already verified that that works.
ok?
Index: kern/sys_futex.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_futex.c,v
retrieving revision 1.7
diff -u -p -r1.7 sys_futex.c
--- kern/sys_futex.c 24 Apr 2018 17:19:35 -0000 1.7
+++ kern/sys_futex.c 2 Jun 2018 16:36:31 -0000
@@ -31,6 +31,8 @@
#include <sys/ktrace.h>
#endif
+#include <uvm/uvm.h>
+
/*
* Atomicity is only needed on MULTIPROCESSOR kernels. Fall back on
* copyin(9) until non-MULTIPROCESSOR architectures have a copyin32(9)
@@ -46,21 +48,23 @@
struct futex {
LIST_ENTRY(futex) ft_list; /* list of all futexes */
TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */
- uint32_t *ft_uaddr; /* userspace address */
+ struct uvm_object *ft_obj; /* UVM object */
+ voff_t ft_off; /* UVM offset */
pid_t ft_pid; /* process identifier */
unsigned int ft_refcnt; /* # of references */
};
/* Syscall helpers. */
-int futex_wait(uint32_t *, uint32_t, const struct timespec *);
-int futex_wake(uint32_t *, uint32_t);
-int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t);
+int futex_wait(uint32_t *, uint32_t, const struct timespec *, int);
+int futex_wake(uint32_t *, uint32_t, int);
+int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int);
/* Flags for futex_get(). */
#define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */
+#define FT_PRIVATE 0x2 /* Futex is process-private. */
-struct futex *futex_get(uint32_t *, int);
-void futex_put(struct futex *);
+struct futex *futex_get(uint32_t *, int);
+void futex_put(struct futex *);
/*
* The global futex lock serialize futex(2) calls such that no wakeup
@@ -94,23 +98,30 @@ sys_futex(struct proc *p, void *v, regis
uint32_t val = SCARG(uap, val);
const struct timespec *timeout = SCARG(uap, timeout);
void *g = SCARG(uap, g);
+ int flags = 0;
+
+ if (op & FUTEX_PRIVATE_FLAG)
+ flags |= FT_PRIVATE;
switch (op) {
case FUTEX_WAIT:
+ case FUTEX_WAIT_PRIVATE:
KERNEL_LOCK();
rw_enter_write(&ftlock);
- *retval = futex_wait(uaddr, val, timeout);
+ *retval = futex_wait(uaddr, val, timeout, flags);
rw_exit_write(&ftlock);
KERNEL_UNLOCK();
break;
case FUTEX_WAKE:
+ case FUTEX_WAKE_PRIVATE:
rw_enter_write(&ftlock);
- *retval = futex_wake(uaddr, val);
+ *retval = futex_wake(uaddr, val, flags);
rw_exit_write(&ftlock);
break;
case FUTEX_REQUEUE:
+ case FUTEX_REQUEUE_PRIVATE:
rw_enter_write(&ftlock);
- *retval = futex_requeue(uaddr, val, g, (unsigned long)timeout);
+ *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags);
rw_exit_write(&ftlock);
break;
default:
@@ -127,29 +138,47 @@ sys_futex(struct proc *p, void *v, regis
* If such futex does not exist and FT_CREATE is given, create it.
*/
struct futex *
-futex_get(uint32_t *uaddr, int flag)
+futex_get(uint32_t *uaddr, int flags)
{
struct proc *p = curproc;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ vm_map_entry_t entry;
+ struct uvm_object *obj = NULL;
+ voff_t off = (vaddr_t)uaddr;
+ pid_t pid = p->p_p->ps_pid;
struct futex *f;
rw_assert_wrlock(&ftlock);
+ if (!(flags & FT_PRIVATE)) {
+ vm_map_lock_read(map);
+ if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) &&
+ UVM_ET_ISOBJ(entry) && entry->object.uvm_obj &&
+ entry->inheritance == MAP_INHERIT_SHARE) {
+ obj = entry->object.uvm_obj;
+ off = entry->offset + ((vaddr_t)uaddr - entry->start);
+ pid = 0;
+ }
+ vm_map_unlock_read(map);
+ }
+
LIST_FOREACH(f, &ftlist, ft_list) {
- if (f->ft_uaddr == uaddr && f->ft_pid == p->p_p->ps_pid) {
+ if (f->ft_obj == obj && f->ft_off == off && f->ft_pid == pid) {
f->ft_refcnt++;
break;
}
}
- if ((f == NULL) && (flag & FT_CREATE)) {
+ if ((f == NULL) && (flags & FT_CREATE)) {
/*
* We rely on the rwlock to ensure that no other thread
* create the same futex.
*/
f = pool_get(&ftpool, PR_WAITOK);
TAILQ_INIT(&f->ft_threads);
- f->ft_uaddr = uaddr;
- f->ft_pid = p->p_p->ps_pid;
+ f->ft_obj = obj;
+ f->ft_off = off;
+ f->ft_pid = pid;
f->ft_refcnt = 1;
LIST_INSERT_HEAD(&ftlist, f, ft_list);
}
@@ -181,7 +210,8 @@ futex_put(struct futex *f)
* indefinitly if the argument is NULL.
*/
int
-futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout)
+futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout,
+ int flags)
{
struct proc *p = curproc;
struct futex *f;
@@ -220,7 +250,7 @@ futex_wait(uint32_t *uaddr, uint32_t val
to_ticks = INT_MAX;
}
- f = futex_get(uaddr, FT_CREATE);
+ f = futex_get(uaddr, flags | FT_CREATE);
TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link);
p->p_futex = f;
@@ -251,7 +281,8 @@ futex_wait(uint32_t *uaddr, uint32_t val
* address ``uaddr2''.
*/
int
-futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m)
+futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m,
+ int flags)
{
struct futex *f, *g;
struct proc *p;
@@ -259,7 +290,7 @@ futex_requeue(uint32_t *uaddr, uint32_t
rw_assert_wrlock(&ftlock);
- f = futex_get(uaddr, 0);
+ f = futex_get(uaddr, flags);
if (f == NULL)
return 0;
@@ -288,7 +319,7 @@ futex_requeue(uint32_t *uaddr, uint32_t
* ``uaddr''.
*/
int
-futex_wake(uint32_t *uaddr, uint32_t n)
+futex_wake(uint32_t *uaddr, uint32_t n, int flags)
{
- return futex_requeue(uaddr, n, NULL, 0);
+ return futex_requeue(uaddr, n, NULL, 0, flags);
}
Index: sys/futex.h
===================================================================
RCS file: /cvs/src/sys/sys/futex.h,v
retrieving revision 1.1
diff -u -p -r1.1 futex.h
--- sys/futex.h 28 Apr 2017 13:50:55 -0000 1.1
+++ sys/futex.h 2 Jun 2018 16:36:32 -0000
@@ -32,4 +32,10 @@ __END_DECLS
#define FUTEX_WAKE 2
#define FUTEX_REQUEUE 3
+#define FUTEX_PRIVATE_FLAG 128
+
+#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
+#define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG)
+
#endif /* _SYS_FUTEX_H_ */