this narrows the scope of the KERNEL_LOCK in kbind(2) so the syscall
argument checks can be done without the kernel lock.
care is taken to allow the pc/cookie checks to run without any lock by
being careful with the order of the checks. all modifications to the
pc/cookie state are serialised by the per process mutex.
i dont know enough about uvm to say whether it is safe to unlock the
actual memory updates too, but even if i was confident i would still
prefer to change it as a separate step.
ok?
Index: kern/init_sysent.c
===================================================================
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.236
diff -u -p -r1.236 init_sysent.c
--- kern/init_sysent.c 1 May 2022 23:00:04 -0000 1.236
+++ kern/init_sysent.c 17 May 2022 00:36:03 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_sysent.c,v 1.236 2022/05/01 23:00:04 tedu Exp $ */
+/* $OpenBSD$ */
/*
* System call switch table.
@@ -204,7 +204,7 @@ const struct sysent sysent[] = {
sys_utimensat }, /* 84 = utimensat */
{ 2, s(struct sys_futimens_args), 0,
sys_futimens }, /* 85 = futimens */
- { 3, s(struct sys_kbind_args), 0,
+ { 3, s(struct sys_kbind_args), SY_NOLOCK | 0,
sys_kbind }, /* 86 = kbind */
{ 2, s(struct sys_clock_gettime_args), SY_NOLOCK | 0,
sys_clock_gettime }, /* 87 = clock_gettime */
Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.223
diff -u -p -r1.223 syscalls.master
--- kern/syscalls.master 24 Feb 2022 07:41:51 -0000 1.223
+++ kern/syscalls.master 17 May 2022 00:36:03 -0000
@@ -194,7 +194,7 @@
const struct timespec *times, int flag); }
85 STD { int sys_futimens(int fd, \
const struct timespec *times); }
-86 STD { int sys_kbind(const struct __kbind *param, \
+86 STD NOLOCK { int sys_kbind(const struct __kbind *param, \
size_t psize, int64_t proc_cookie); }
87 STD NOLOCK { int sys_clock_gettime(clockid_t clock_id, \
struct timespec *tp); }
Index: uvm/uvm_mmap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_mmap.c,v
retrieving revision 1.169
diff -u -p -r1.169 uvm_mmap.c
--- uvm/uvm_mmap.c 19 Jan 2022 10:43:48 -0000 1.169
+++ uvm/uvm_mmap.c 17 May 2022 00:36:03 -0000
@@ -70,6 +70,7 @@
#include <sys/pledge.h>
#include <sys/unistd.h> /* for KBIND* */
#include <sys/user.h>
+#include <sys/atomic.h>
#include <machine/exec.h> /* for __LDPGSZ */
@@ -1125,33 +1126,64 @@ sys_kbind(struct proc *p, void *v, regis
const char *data;
vaddr_t baseva, last_baseva, endva, pageoffset, kva;
size_t psize, s;
- u_long pc;
+ u_long pc, kpc;
int count, i, extra;
+ uint64_t cookie;
int error;
/*
* extract syscall args from uap
*/
paramp = SCARG(uap, param);
- psize = SCARG(uap, psize);
/* a NULL paramp disables the syscall for the process */
if (paramp == NULL) {
+ mtx_enter(&pr->ps_mtx);
if (pr->ps_kbind_addr != 0)
- sigexit(p, SIGILL);
+ goto leave_sigill;
pr->ps_kbind_addr = BOGO_PC;
+ mtx_leave(&pr->ps_mtx);
return 0;
}
/* security checks */
+
+ /*
+ * ps_kbind_addr can only be set to 0 or BOGO_PC by the
+ * kernel, not by a call from userland.
+ */
pc = PROC_PC(p);
- if (pr->ps_kbind_addr == 0) {
- pr->ps_kbind_addr = pc;
- pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
- } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC)
- sigexit(p, SIGILL);
- else if (pr->ps_kbind_cookie != SCARG(uap, proc_cookie))
- sigexit(p, SIGILL);
+ if (pc == 0 || pc == BOGO_PC)
+ goto sigill;
+
+ cookie = SCARG(uap, proc_cookie);
+ if (pr->ps_kbind_addr == pc) {
+ membar_datadep_consumer();
+ if (pr->ps_kbind_cookie != cookie)
+ goto sigill;
+ } else {
+ mtx_enter(&pr->ps_mtx);
+ kpc = pr->ps_kbind_addr;
+
+ /*
+ * If we're the first thread in (kpc is 0), then
+ * remember where the call came from for the future.
+ *
+ * Maybe another thread raced us and won. pc cannot
+ * be BOGO_PC, so this also handles the check for
+ * the disabled case.
+ */
+
+ if (kpc == 0) {
+ pr->ps_kbind_cookie = cookie;
+ membar_producer();
+ pr->ps_kbind_addr = pc;
+ } else if (kpc != pc || pr->ps_kbind_cookie != cookie)
+ goto leave_sigill;
+ mtx_leave(&pr->ps_mtx);
+ }
+
+ psize = SCARG(uap, psize);
if (psize < sizeof(struct __kbind) || psize > sizeof(param))
return EINVAL;
if ((error = copyin(paramp, ¶m, psize)))
@@ -1187,6 +1219,7 @@ sys_kbind(struct proc *p, void *v, regis
data = (const char *)¶mp[count];
/* all looks good, so do the bindings */
+ KERNEL_LOCK();
last_baseva = VM_MAXUSER_ADDRESS;
kva = 0;
TAILQ_INIT(&dead_entries);
@@ -1239,6 +1272,14 @@ redo:
vm_map_unlock(kernel_map);
}
uvm_unmap_detach(&dead_entries, AMAP_REFALL);
+ KERNEL_UNLOCK();
return error;
+
+leave_sigill:
+ mtx_leave(&pr->ps_mtx);
+sigill:
+ KERNEL_LOCK();
+ sigexit(p, SIGILL);
+ /* NOTREACHED KERNEL_UNLOCK(); */
}
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.329
diff -u -p -r1.329 proc.h
--- sys/proc.h 10 Mar 2022 15:21:08 -0000 1.329
+++ sys/proc.h 17 May 2022 00:36:03 -0000
@@ -234,8 +234,8 @@ struct process {
uint64_t ps_pledge;
uint64_t ps_execpledge;
- int64_t ps_kbind_cookie;
- u_long ps_kbind_addr;
+ int64_t ps_kbind_cookie; /* [m] */
+ u_long ps_kbind_addr; /* [m] */
/* End area that is copied on creation. */
#define ps_endcopy ps_refcnt