On Sat, Apr 04, 2020 at 03:53:50PM +0300, Paul Irofti wrote:
> > The real problem is that futex(2) is actually 3 different syscalls wrapped
> > into one. It was split into three then kdump could properly report
> > futex_wake(2) and futex_requeue(2) as returning a count, while
> > futex_wait(2) returns an errno. The existing 'switch' in sys_futex()
> > would just move to userspace's futex(3), provided for linux compat.
>
> I have such a diff from half a year ago. Let me get it back in shape and
> I'll send it back here.
I tried diffing sys and lib/libc at once but CVS is too retarded to do
that and diffing the whole src tree took forever. So I am sending
separated diffs for each.
When booting I get a warning from ld.so that it is not finding the
libc_futex_{wait,wake,requeue} symbols that I don't know how to fix.
Perhaps making a release would fix it but I can not do that right now
(not enough disk left).
This has not been tested enough and will probably blow up your
computer, so use it with care! Reports are welcome though :)
Here are the kernel bits:
%-----------------------------------------------------------------------
Index: kern/init_sysent.c
===================================================================
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.218
diff -u -p -u -p -r1.218 init_sysent.c
--- kern/init_sysent.c 18 Mar 2020 19:35:00 -0000 1.218
+++ kern/init_sysent.c 4 Apr 2020 14:48:40 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_sysent.c,v 1.218 2020/03/18 19:35:00 anton Exp $ */
+/* $OpenBSD$ */
/*
* System call switch table.
@@ -198,8 +198,8 @@ struct sysent sysent[] = {
sys_getpgrp }, /* 81 = getpgrp */
{ 2, s(struct sys_setpgid_args), 0,
sys_setpgid }, /* 82 = setpgid */
- { 5, s(struct sys_futex_args), SY_NOLOCK | 0,
- sys_futex }, /* 83 = futex */
+ { 5, s(struct sys_ofutex_args), SY_NOLOCK | 0,
+ sys_ofutex }, /* 83 = ofutex */
{ 4, s(struct sys_utimensat_args), 0,
sys_utimensat }, /* 84 = utimensat */
{ 2, s(struct sys_futimens_args), 0,
@@ -751,5 +751,11 @@ struct sysent sysent[] = {
sys___set_tcb }, /* 329 = __set_tcb */
{ 0, 0, SY_NOLOCK | 0,
sys___get_tcb }, /* 330 = __get_tcb */
+ { 4, s(struct sys_futex_wait_args), SY_NOLOCK | 0,
+ sys_futex_wait }, /* 331 = futex_wait */
+ { 3, s(struct sys_futex_wake_args), SY_NOLOCK | 0,
+ sys_futex_wake }, /* 332 = futex_wake */
+ { 5, s(struct sys_futex_requeue_args), SY_NOLOCK | 0,
+ sys_futex_requeue }, /* 333 = futex_requeue */
};
Index: kern/kern_pledge.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_pledge.c,v
retrieving revision 1.261
diff -u -p -u -p -r1.261 kern_pledge.c
--- kern/kern_pledge.c 15 Feb 2020 09:35:48 -0000 1.261
+++ kern/kern_pledge.c 4 Apr 2020 14:48:40 -0000
@@ -266,7 +266,10 @@ const uint64_t pledge_syscalls[SYS_MAXSY
*/
[SYS___tfork] = PLEDGE_STDIO,
[SYS_sched_yield] = PLEDGE_STDIO,
- [SYS_futex] = PLEDGE_STDIO,
+ [SYS_ofutex] = PLEDGE_STDIO,
+ [SYS_futex_wait] = PLEDGE_STDIO,
+ [SYS_futex_wake] = PLEDGE_STDIO,
+ [SYS_futex_requeue] = PLEDGE_STDIO,
[SYS___thrsleep] = PLEDGE_STDIO,
[SYS___thrwakeup] = PLEDGE_STDIO,
[SYS___threxit] = PLEDGE_STDIO,
Index: kern/sys_futex.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_futex.c,v
retrieving revision 1.15
diff -u -p -u -p -r1.15 sys_futex.c
--- kern/sys_futex.c 20 Mar 2020 17:17:31 -0000 1.15
+++ kern/sys_futex.c 4 Apr 2020 14:48:40 -0000
@@ -83,9 +83,74 @@ futex_init(void)
}
int
-sys_futex(struct proc *p, void *v, register_t *retval)
+sys_futex_wait(struct proc *p, void *v, register_t *retval)
{
- struct sys_futex_args /* {
+ struct sys_futex_wait_args /* {
+ syscallarg(uint32_t *) f;
+ syscallarg(inr) val;
+ syscallarg(const struct timespec *) timeout;
+ syscallarg(int) flags;
+ } */ *uap = v;
+ uint32_t *uaddr = SCARG(uap, f);
+ uint32_t val = SCARG(uap, val);
+ const struct timespec *timeout = SCARG(uap, timeout);
+ int flags = SCARG(uap, flags);
+
+ KERNEL_LOCK();
+ rw_enter_write(&ftlock);
+ *retval = futex_wait(uaddr, val, timeout, flags);
+ rw_exit_write(&ftlock);
+ KERNEL_UNLOCK();
+
+ return 0;
+}
+
+int
+sys_futex_wake(struct proc *p, void *v, register_t *retval)
+{
+ struct sys_futex_wake_args /* {
+ syscallarg(uint32_t *) f;
+ syscallarg(int) val;
+ syscallarg(int) flags;
+ } */ *uap = v;
+ uint32_t *uaddr = SCARG(uap, f);
+ uint32_t val = SCARG(uap, val);
+ int flags = SCARG(uap, flags);
+
+ rw_enter_write(&ftlock);
+ *retval = futex_wake(uaddr, val, flags);
+ rw_exit_write(&ftlock);
+
+ return 0;
+}
+
+int
+sys_futex_requeue(struct proc *p, void *v, register_t *retval)
+{
+ struct sys_futex_requeue_args /* {
+ syscallarg(uint32_t *) f;
+ syscallarg(int) val;
+ syscallarg(uint32_t *) g;
+ syscallarg(const struct timespec *) timeout;
+ syscallarg(int) flags;
+ } */ *uap = v;
+ uint32_t *uaddr = SCARG(uap, f);
+ uint32_t val = SCARG(uap, val);
+ const struct timespec *timeout = SCARG(uap, timeout);
+ void *g = SCARG(uap, g);
+ int flags = SCARG(uap, flags);
+
+ rw_enter_write(&ftlock);
+ *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags);
+ rw_exit_write(&ftlock);
+
+ return 0;
+}
+
+int
+sys_ofutex(struct proc *p, void *v, register_t *retval)
+{
+ struct sys_ofutex_args /* {
syscallarg(uint32_t *) f;
syscallarg(int) op;
syscallarg(inr) val;
Index: kern/syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.c,v
retrieving revision 1.217
diff -u -p -u -p -r1.217 syscalls.c
--- kern/syscalls.c 18 Mar 2020 19:35:00 -0000 1.217
+++ kern/syscalls.c 4 Apr 2020 14:48:40 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscalls.c,v 1.217 2020/03/18 19:35:00 anton Exp $ */
+/* $OpenBSD$ */
/*
* System call names.
@@ -103,7 +103,7 @@ char *syscallnames[] = {
"setgroups", /* 80 = setgroups */
"getpgrp", /* 81 = getpgrp */
"setpgid", /* 82 = setpgid */
- "futex", /* 83 = futex */
+ "ofutex", /* 83 = ofutex */
"utimensat", /* 84 = utimensat */
"futimens", /* 85 = futimens */
"kbind", /* 86 = kbind */
@@ -393,4 +393,7 @@ char *syscallnames[] = {
"#328 (obsolete __tfork51)", /* 328 = obsolete __tfork51 */
"__set_tcb", /* 329 = __set_tcb */
"__get_tcb", /* 330 = __get_tcb */
+ "futex_wait", /* 331 = futex_wait */
+ "futex_wake", /* 332 = futex_wake */
+ "futex_requeue", /* 333 = futex_requeue */
};
Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.207
diff -u -p -u -p -r1.207 syscalls.master
--- kern/syscalls.master 18 Mar 2020 19:33:36 -0000 1.207
+++ kern/syscalls.master 4 Apr 2020 14:48:40 -0000
@@ -186,7 +186,7 @@
const gid_t *gidset); }
81 STD { int sys_getpgrp(void); }
82 STD { int sys_setpgid(pid_t pid, pid_t pgid); }
-83 STD NOLOCK { int sys_futex(uint32_t *f, int op, int val, \
+83 STD NOLOCK { int sys_ofutex(uint32_t *f, int op, int val, \
const struct timespec *timeout, uint32_t *g); }
84 STD { int sys_utimensat(int fd, const char *path, \
const struct timespec *times, int flag); }
@@ -566,3 +566,9 @@
328 OBSOL __tfork51
329 STD NOLOCK { void sys___set_tcb(void *tcb); }
330 STD NOLOCK { void *sys___get_tcb(void); }
+331 STD NOLOCK { int sys_futex_wait(uint32_t *f, int val, \
+ const struct timespec *timeout, int flags); }
+332 STD NOLOCK { int sys_futex_wake(uint32_t *f, int val, int flags); }
+333 STD NOLOCK { int sys_futex_requeue(uint32_t *f, int val, \
+ uint32_t *g, const struct timespec *timeout, \
+ int flags); }
Index: sys/futex.h
===================================================================
RCS file: /cvs/src/sys/sys/futex.h,v
retrieving revision 1.2
diff -u -p -u -p -r1.2 futex.h
--- sys/futex.h 3 Jun 2018 15:09:26 -0000 1.2
+++ sys/futex.h 4 Apr 2020 14:48:40 -0000
@@ -23,10 +23,18 @@
#include <sys/cdefs.h>
__BEGIN_DECLS
-int futex(volatile uint32_t *, int, int, const struct timespec *,
- volatile uint32_t *);
+/* int futex(volatile uint32_t *, int, int, const struct timespec *,
+ volatile uint32_t *); */
+int futex_wait(uint32_t *f, int val, const struct timespec *timeout,
+ int flags);
+int futex_wake(uint32_t *f, int val, int flags);
+int futex_requeue(uint32_t *f, int val, uint32_t *g,
+ const struct timespec *timeout, int flags);
__END_DECLS
+
#endif /* ! _KERNEL */
+
+#define FT_PRIVATE 0x2 /* Futex is process-private. */
#define FUTEX_WAIT 1
#define FUTEX_WAKE 2
Index: sys/syscall.h
===================================================================
RCS file: /cvs/src/sys/sys/syscall.h,v
retrieving revision 1.215
diff -u -p -u -p -r1.215 syscall.h
--- sys/syscall.h 18 Mar 2020 19:35:00 -0000 1.215
+++ sys/syscall.h 4 Apr 2020 14:48:40 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscall.h,v 1.215 2020/03/18 19:35:00 anton Exp $ */
+/* $OpenBSD$ */
/*
* System call numbers.
@@ -251,8 +251,8 @@
/* syscall: "setpgid" ret: "int" args: "pid_t" "pid_t" */
#define SYS_setpgid 82
-/* syscall: "futex" ret: "int" args: "uint32_t *" "int" "int" "const struct
timespec *" "uint32_t *" */
-#define SYS_futex 83
+/* syscall: "ofutex" ret: "int" args: "uint32_t *" "int" "int" "const struct
timespec *" "uint32_t *" */
+#define SYS_ofutex 83
/* syscall: "utimensat" ret: "int" args: "int" "const char *" "const struct
timespec *" "int" */
#define SYS_utimensat 84
@@ -705,4 +705,13 @@
/* syscall: "__get_tcb" ret: "void *" args: */
#define SYS___get_tcb 330
-#define SYS_MAXSYSCALL 331
+/* syscall: "futex_wait" ret: "int" args: "uint32_t *" "int" "const struct
timespec *" "int" */
+#define SYS_futex_wait 331
+
+/* syscall: "futex_wake" ret: "int" args: "uint32_t *" "int" "int" */
+#define SYS_futex_wake 332
+
+/* syscall: "futex_requeue" ret: "int" args: "uint32_t *" "int" "uint32_t *"
"const struct timespec *" "int" */
+#define SYS_futex_requeue 333
+
+#define SYS_MAXSYSCALL 334
Index: sys/syscallargs.h
===================================================================
RCS file: /cvs/src/sys/sys/syscallargs.h,v
retrieving revision 1.218
diff -u -p -u -p -r1.218 syscallargs.h
--- sys/syscallargs.h 18 Mar 2020 19:35:00 -0000 1.218
+++ sys/syscallargs.h 4 Apr 2020 14:48:40 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscallargs.h,v 1.218 2020/03/18 19:35:00 anton Exp $ */
+/* $OpenBSD$ */
/*
* System call argument lists.
@@ -390,7 +390,7 @@ struct sys_setpgid_args {
syscallarg(pid_t) pgid;
};
-struct sys_futex_args {
+struct sys_ofutex_args {
syscallarg(uint32_t *) f;
syscallarg(int) op;
syscallarg(int) val;
@@ -1109,6 +1109,27 @@ struct sys___set_tcb_args {
syscallarg(void *) tcb;
};
+struct sys_futex_wait_args {
+ syscallarg(uint32_t *) f;
+ syscallarg(int) val;
+ syscallarg(const struct timespec *) timeout;
+ syscallarg(int) flags;
+};
+
+struct sys_futex_wake_args {
+ syscallarg(uint32_t *) f;
+ syscallarg(int) val;
+ syscallarg(int) flags;
+};
+
+struct sys_futex_requeue_args {
+ syscallarg(uint32_t *) f;
+ syscallarg(int) val;
+ syscallarg(uint32_t *) g;
+ syscallarg(const struct timespec *) timeout;
+ syscallarg(int) flags;
+};
+
/*
* System call prototypes.
*/
@@ -1202,7 +1223,7 @@ int sys_getgroups(struct proc *, void *,
int sys_setgroups(struct proc *, void *, register_t *);
int sys_getpgrp(struct proc *, void *, register_t *);
int sys_setpgid(struct proc *, void *, register_t *);
-int sys_futex(struct proc *, void *, register_t *);
+int sys_ofutex(struct proc *, void *, register_t *);
int sys_utimensat(struct proc *, void *, register_t *);
int sys_futimens(struct proc *, void *, register_t *);
int sys_kbind(struct proc *, void *, register_t *);
@@ -1360,3 +1381,6 @@ int sys_symlinkat(struct proc *, void *,
int sys_unlinkat(struct proc *, void *, register_t *);
int sys___set_tcb(struct proc *, void *, register_t *);
int sys___get_tcb(struct proc *, void *, register_t *);
+int sys_futex_wait(struct proc *, void *, register_t *);
+int sys_futex_wake(struct proc *, void *, register_t *);
+int sys_futex_requeue(struct proc *, void *, register_t *);
%-----------------------------------------------------------------------
And here are the libc bits:
%-----------------------------------------------------------------------
Index: Symbols.list
===================================================================
RCS file: /cvs/src/lib/libc/Symbols.list,v
retrieving revision 1.73
diff -u -p -u -p -r1.73 Symbols.list
--- Symbols.list 24 Oct 2019 05:57:41 -0000 1.73
+++ Symbols.list 4 Apr 2020 14:49:55 -0000
@@ -86,7 +86,10 @@ _thread_sys_fstatat
_thread_sys_fstatfs
_thread_sys_fsync
_thread_sys_ftruncate
-_thread_sys_futex
+_thread_sys_ofutex
+_thread_sys_futex_wait
+_thread_sys_futex_wake
+_thread_sys_futex_requeue
_thread_sys_futimens
_thread_sys_futimes
_thread_sys_getdents
@@ -282,7 +285,10 @@ fstatat
fstatfs
fsync
ftruncate
-futex
+ofutex
+futex_wait
+futex_wake
+futex_requeue
futimens
futimes
getdents
@@ -1685,6 +1691,7 @@ _spinunlock
_thread_atfork
_thread_dofork
_thread_set_callbacks
+futex
pthread_atfork
pthread_cond_broadcast
pthread_cond_destroy
Index: shlib_version
===================================================================
RCS file: /cvs/src/lib/libc/shlib_version,v
retrieving revision 1.209
diff -u -p -u -p -r1.209 shlib_version
--- shlib_version 23 Oct 2019 19:33:41 -0000 1.209
+++ shlib_version 4 Apr 2020 14:49:55 -0000
@@ -1,4 +1,4 @@
major=96
-minor=0
+minor=1
# note: If changes were made to include/thread_private.h or if system calls
# were added/changed then librthread/shlib_version must also be updated.
Index: gen/sigwait.c
===================================================================
RCS file: /cvs/src/lib/libc/gen/sigwait.c,v
retrieving revision 1.1
diff -u -p -u -p -r1.1 sigwait.c
--- gen/sigwait.c 12 Jan 2019 00:16:03 -0000 1.1
+++ gen/sigwait.c 4 Apr 2020 14:49:55 -0000
@@ -58,6 +58,7 @@ sigwaitinfo(const sigset_t *set, siginfo
LEAVE_CANCEL_POINT(ret == -1);
return (ret);
}
+#endif
int
sigtimedwait(const sigset_t *set, siginfo_t *info,
@@ -72,4 +73,3 @@ sigtimedwait(const sigset_t *set, siginf
LEAVE_CANCEL_POINT(ret == -1);
return (ret);
}
-#endif
Index: hidden/sys/futex.h
===================================================================
RCS file: /cvs/src/lib/libc/hidden/sys/futex.h,v
retrieving revision 1.1
diff -u -p -u -p -r1.1 futex.h
--- hidden/sys/futex.h 15 Aug 2017 07:02:35 -0000 1.1
+++ hidden/sys/futex.h 4 Apr 2020 14:49:55 -0000
@@ -20,6 +20,8 @@
#include_next <sys/futex.h>
-PROTO_NORMAL(futex);
+PROTO_NORMAL(futex_wait);
+PROTO_NORMAL(futex_wake);
+PROTO_NORMAL(futex_requeue);
#endif /* !_LIBC_SYS_FUTEX_H_ */
Index: sys/Makefile.inc
===================================================================
RCS file: /cvs/src/lib/libc/sys/Makefile.inc,v
retrieving revision 1.158
diff -u -p -u -p -r1.158 Makefile.inc
--- sys/Makefile.inc 27 Nov 2019 19:45:24 -0000 1.158
+++ sys/Makefile.inc 4 Apr 2020 14:49:55 -0000
@@ -87,7 +87,7 @@ DASM= ${ASM:.o=.do}
# syscalls that CANNOT FAIL. They can return whatever value they want,
# they just never want to set errno.
ASM_NOERR=__get_tcb.o __set_tcb.o __threxit.o __thrsleep.o __thrwakeup.o \
- futex.o \
+ ofutex.o futex_wait futex_wake futex_requeue \
getdtablecount.o getegid.o geteuid.o getgid.o getlogin_r.o \
getpgrp.o getpid.o getppid.o getrtable.o getthrid.o getuid.o \
issetugid.o \
Index: thread/rthread_mutex.c
===================================================================
RCS file: /cvs/src/lib/libc/thread/rthread_mutex.c,v
retrieving revision 1.5
diff -u -p -u -p -r1.5 rthread_mutex.c
--- thread/rthread_mutex.c 13 Feb 2019 13:09:32 -0000 1.5
+++ thread/rthread_mutex.c 4 Apr 2020 14:49:55 -0000
@@ -284,3 +284,10 @@ pthread_mutex_unlock(pthread_mutex_t *mu
return (0);
}
DEF_STRONG(pthread_mutex_unlock);
+
+int
+futex(volatile uint32_t *f, int op, int val, const struct timespec *timeout,
uint32_t *g)
+{
+ return _futex(f, op, val, timeout, g);
+}
+DEF_STRONG(futex);
Index: thread/synch.h
===================================================================
RCS file: /cvs/src/lib/libc/thread/synch.h,v
retrieving revision 1.4
diff -u -p -u -p -r1.4 synch.h
--- thread/synch.h 21 Oct 2019 10:04:19 -0000 1.4
+++ thread/synch.h 4 Apr 2020 14:49:55 -0000
@@ -20,9 +20,32 @@
#include <sys/futex.h>
static inline int
+_futex(volatile uint32_t *p, int op, int val, const struct timespec *timeout,
uint32_t *g)
+{
+ int flags = 0;
+
+ if (op & FUTEX_PRIVATE_FLAG)
+ flags |= FT_PRIVATE;
+
+ switch (op) {
+ case FUTEX_WAIT:
+ case FUTEX_WAIT_PRIVATE:
+ return futex_wait(p, val, timeout, flags);
+ case FUTEX_WAKE:
+ case FUTEX_WAKE_PRIVATE:
+ return futex_wake(p, val, flags);
+ case FUTEX_REQUEUE:
+ case FUTEX_REQUEUE_PRIVATE:
+ return futex_requeue(p, val, g, timeout, flags);
+ }
+
+ return ENOSYS;
+}
+
+static inline int
_wake(volatile uint32_t *p, int n)
{
- return futex(p, FUTEX_WAKE_PRIVATE, n, NULL, NULL);
+ return _futex(p, FUTEX_WAKE_PRIVATE, n, NULL, NULL);
}
static inline int
@@ -31,7 +54,7 @@ _twait(volatile uint32_t *p, int val, cl
struct timespec rel;
if (abs == NULL)
- return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
+ return _futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
return (EINVAL);
@@ -44,11 +67,11 @@ _twait(volatile uint32_t *p, int val, cl
if (rel.tv_sec < 0)
return (ETIMEDOUT);
- return futex(p, FUTEX_WAIT_PRIVATE, val, &rel, NULL);
+ return _futex(p, FUTEX_WAIT_PRIVATE, val, &rel, NULL);
}
static inline int
_requeue(volatile uint32_t *p, int n, int m, volatile uint32_t *q)
{
- return futex(p, FUTEX_REQUEUE_PRIVATE, n, (void *)(long)m, q);
+ return _futex(p, FUTEX_REQUEUE_PRIVATE, n, (void *)(long)m, q);
}