On Tue, Jun 28 2022, Martin Pieuchot <[email protected]> wrote:
> On 28/06/22(Tue) 18:17, Jeremie Courreges-Anglas wrote:
>>
>> Initially I just wandered in syscall_mi.h and found the locking scheme
>> super weird, even if technically correct. pledge_syscall() better be
>> safe to call without the kernel lock so I don't understand why we're
>> sometimes calling it with the kernel lock and sometimes not.
>>
>> ps_pledge is 64 bits so it's not possible to unset bits in an atomic
>> manner on all architectures. Even if we're only removing bits and there
>> is probably no way to see a completely garbage value, it makes sense to
>> just protect ps_pledge (and ps_execpledge) in the usual manner so that
>> we can unlock the syscall. The diff below protects the fields using
>> ps_mtx even though I initially used a dedicated ps_pledge_mtx.
>> unveil_destroy() needs to be moved after the critical section.
>> regress/sys/kern/pledge looks happy with this. The sys/syscall_mi.h
>> change can be committed in a separate step.
>>
>> Input and oks welcome.
>
> This looks nice. I doubt there's any existing program where you can
> really test this. Even firefox and chromium should do things
> correctly.
>
> Maybe you should write a regress test that tries to break the kernel.
That would need some pondering, I'm not sure what kind of breakage
you're envisioning. The obvious check to perform would be to ensure
that we're not increasing the promises but the code already does it.
Hmm.
For now, here's the latest diff based on input from the hackroom, which
basically amounts to:
- don't take the mutex to read the value. Even on 32 bits machines that
can't write an 8 byte value in a single go we don't expect garbage to
on a read crossing a write, since we only remove bits and never add
some.
- use READ_ONCE when we're storing the value in a local variable, to
prevent possible reloading from memory. In the current code it would
not be a problem but ensuring that we do not reload from memory is
just good practice for stuff that can be modified concurrently.
(This part can be committed in a second step, just like the
syscall_mi.h change.)
ok?
Index: kern/init_sysent.c
===================================================================
RCS file: /home/cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.238
diff -u -p -r1.238 init_sysent.c
--- kern/init_sysent.c 27 Jun 2022 14:26:05 -0000 1.238
+++ kern/init_sysent.c 28 Jun 2022 15:18:25 -0000
@@ -1,10 +1,10 @@
-/* $OpenBSD: init_sysent.c,v 1.238 2022/06/27 14:26:05 cheloha Exp $
*/
+/* $OpenBSD$ */
/*
* System call switch table.
*
* DO NOT EDIT-- this file is automatically generated.
- * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
mvs Exp
+ * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
cheloha Exp
*/
#include <sys/param.h>
@@ -248,7 +248,7 @@ const struct sysent sysent[] = {
sys_listen }, /* 106 = listen */
{ 4, s(struct sys_chflagsat_args), 0,
sys_chflagsat }, /* 107 = chflagsat */
- { 2, s(struct sys_pledge_args), 0,
+ { 2, s(struct sys_pledge_args), SY_NOLOCK | 0,
sys_pledge }, /* 108 = pledge */
{ 4, s(struct sys_ppoll_args), 0,
sys_ppoll }, /* 109 = ppoll */
Index: kern/kern_pledge.c
===================================================================
RCS file: /home/cvs/src/sys/kern/kern_pledge.c,v
retrieving revision 1.282
diff -u -p -r1.282 kern_pledge.c
--- kern/kern_pledge.c 26 Jun 2022 06:11:49 -0000 1.282
+++ kern/kern_pledge.c 28 Jun 2022 17:00:51 -0000
@@ -21,6 +21,7 @@
#include <sys/mount.h>
#include <sys/proc.h>
+#include <sys/mutex.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@@ -465,13 +466,26 @@ sys_pledge(struct proc *p, void *v, regi
struct process *pr = p->p_p;
uint64_t promises, execpromises;
int error;
+ int unveil_cleanup = 0;
+ /* Check for any error in user input */
if (SCARG(uap, promises)) {
error = parsepledges(p, "pledgereq",
SCARG(uap, promises), &promises);
if (error)
return (error);
+ }
+ if (SCARG(uap, execpromises)) {
+ error = parsepledges(p, "pledgeexecreq",
+ SCARG(uap, execpromises), &execpromises);
+ if (error)
+ return (error);
+ }
+
+ mtx_enter(&pr->ps_mtx);
+ /* Check for any error wrt current promises */
+ if (SCARG(uap, promises)) {
/* In "error" mode, ignore promise increase requests,
* but accept promise decrease requests */
if (ISSET(pr->ps_flags, PS_PLEDGE) &&
@@ -480,37 +494,47 @@ sys_pledge(struct proc *p, void *v, regi
/* Only permit reductions */
if (ISSET(pr->ps_flags, PS_PLEDGE) &&
- (((promises | pr->ps_pledge) != pr->ps_pledge)))
+ (((promises | pr->ps_pledge) != pr->ps_pledge))) {
+ mtx_leave(&pr->ps_mtx);
return (EPERM);
+ }
}
if (SCARG(uap, execpromises)) {
- error = parsepledges(p, "pledgeexecreq",
- SCARG(uap, execpromises), &execpromises);
- if (error)
- return (error);
-
/* Only permit reductions */
if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
- (((execpromises | pr->ps_execpledge) != pr->ps_execpledge)))
+ (((execpromises | pr->ps_execpledge) !=
pr->ps_execpledge))) {
+ mtx_leave(&pr->ps_mtx);
return (EPERM);
+ }
}
+ /* Set up promises */
if (SCARG(uap, promises)) {
pr->ps_pledge = promises;
atomic_setbits_int(&pr->ps_flags, PS_PLEDGE);
- /*
- * Kill off unveil and drop unveil vnode refs if we no
- * longer are holding any path-accessing pledge
- */
+
if ((pr->ps_pledge & (PLEDGE_RPATH | PLEDGE_WPATH |
PLEDGE_CPATH | PLEDGE_DPATH | PLEDGE_TMPPATH | PLEDGE_EXEC |
PLEDGE_UNIX | PLEDGE_UNVEIL)) == 0)
- unveil_destroy(pr);
+ unveil_cleanup = 1;
}
if (SCARG(uap, execpromises)) {
pr->ps_execpledge = execpromises;
atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
}
+
+ mtx_leave(&pr->ps_mtx);
+
+ if (unveil_cleanup) {
+ /*
+ * Kill off unveil and drop unveil vnode refs if we no
+ * longer are holding any path-accessing pledge
+ */
+ KERNEL_LOCK();
+ unveil_destroy(pr);
+ KERNEL_UNLOCK();
+ }
+
return (0);
}
@@ -583,7 +607,7 @@ pledge_namei(struct proc *p, struct name
if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
(p->p_p->ps_flags & PS_COREDUMP))
return (0);
- pledge = p->p_p->ps_pledge;
+ pledge = READ_ONCE(p->p_p->ps_pledge);
if (ni->ni_pledge == 0)
panic("pledge_namei: ni_pledge");
@@ -704,11 +728,12 @@ pledge_namei(struct proc *p, struct name
* XXX
* The current hack for YP support in "getpw"
* is to enable some "inet" features until
- * next pledge call. Setting a bit in ps_pledge
- * is not safe with respect to multiple threads,
- * a very different approach is needed.
+ * next pledge call.
*/
+ mtx_enter(&p->p_p->ps_mtx);
p->p_p->ps_pledge |= PLEDGE_YPACTIVE;
+ mtx_leave(&p->p_p->ps_mtx);
+ pledge |= PLEDGE_YPACTIVE;
ni->ni_cnd.cn_flags |= BYPASSUNVEIL;
return (0);
}
@@ -826,7 +851,7 @@ pledge_sysctl(struct proc *p, int miblen
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- pledge = p->p_p->ps_pledge;
+ pledge = READ_ONCE(p->p_p->ps_pledge);
if (new)
return pledge_fail(p, EFAULT, 0);
@@ -1070,7 +1095,7 @@ pledge_ioctl(struct proc *p, long com, s
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- pledge = p->p_p->ps_pledge;
+ pledge = READ_ONCE(p->p_p->ps_pledge);
/*
* The ioctl's which are always allowed.
@@ -1365,7 +1390,7 @@ pledge_sockopt(struct proc *p, int set,
if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
return (0);
- pledge = p->p_p->ps_pledge;
+ pledge = READ_ONCE(p->p_p->ps_pledge);
/* Always allow these, which are too common to reject */
switch (level) {
@@ -1515,7 +1540,7 @@ pledge_socket(struct proc *p, int domain
if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
return 0;
- pledge = p->p_p->ps_pledge;
+ pledge = READ_ONCE(p->p_p->ps_pledge);
if (ISSET(state, SS_DNS)) {
if (ISSET(pledge, PLEDGE_DNS))
Index: kern/syscalls.c
===================================================================
RCS file: /home/cvs/src/sys/kern/syscalls.c,v
retrieving revision 1.236
diff -u -p -r1.236 syscalls.c
--- kern/syscalls.c 16 May 2022 07:38:10 -0000 1.236
+++ kern/syscalls.c 28 Jun 2022 15:18:25 -0000
@@ -1,10 +1,10 @@
-/* $OpenBSD: syscalls.c,v 1.236 2022/05/16 07:38:10 mvs Exp $ */
+/* $OpenBSD$ */
/*
* System call names.
*
* DO NOT EDIT-- this file is automatically generated.
- * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
mvs Exp
+ * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
cheloha Exp
*/
const char *const syscallnames[] = {
Index: kern/syscalls.master
===================================================================
RCS file: /home/cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.225
diff -u -p -r1.225 syscalls.master
--- kern/syscalls.master 27 Jun 2022 14:26:05 -0000 1.225
+++ kern/syscalls.master 28 Jun 2022 12:47:19 -0000
@@ -228,7 +228,7 @@
106 STD { int sys_listen(int s, int backlog); }
107 STD { int sys_chflagsat(int fd, const char *path, \
u_int flags, int atflags); }
-108 STD { int sys_pledge(const char *promises, \
+108 STD NOLOCK { int sys_pledge(const char *promises, \
const char *execpromises); }
109 STD { int sys_ppoll(struct pollfd *fds, \
u_int nfds, const struct timespec *ts, \
Index: sys/proc.h
===================================================================
RCS file: /home/cvs/src/sys/sys/proc.h,v
retrieving revision 1.331
diff -u -p -r1.331 proc.h
--- sys/proc.h 27 Jun 2022 14:26:05 -0000 1.331
+++ sys/proc.h 28 Jun 2022 16:55:24 -0000
@@ -231,8 +231,8 @@ struct process {
u_int32_t ps_acflag; /* Accounting flags. */
- uint64_t ps_pledge;
- uint64_t ps_execpledge;
+ uint64_t ps_pledge; /* [m] pledge promises */
+ uint64_t ps_execpledge; /* [m] execpledge promises */
int64_t ps_kbind_cookie; /* [m] */
u_long ps_kbind_addr; /* [m] */
Index: sys/syscall.h
===================================================================
RCS file: /home/cvs/src/sys/sys/syscall.h,v
retrieving revision 1.235
diff -u -p -r1.235 syscall.h
--- sys/syscall.h 27 Jun 2022 14:26:06 -0000 1.235
+++ sys/syscall.h 28 Jun 2022 15:18:25 -0000
@@ -1,10 +1,10 @@
-/* $OpenBSD: syscall.h,v 1.235 2022/06/27 14:26:06 cheloha Exp $ */
+/* $OpenBSD$ */
/*
* System call numbers.
*
* DO NOT EDIT-- this file is automatically generated.
- * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
mvs Exp
+ * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
cheloha Exp
*/
/* syscall: "syscall" ret: "int" args: "int" "..." */
Index: sys/syscall_mi.h
===================================================================
RCS file: /home/cvs/src/sys/sys/syscall_mi.h,v
retrieving revision 1.25
diff -u -p -r1.25 syscall_mi.h
--- sys/syscall_mi.h 21 Jan 2020 16:16:23 -0000 1.25
+++ sys/syscall_mi.h 28 Jun 2022 12:43:07 -0000
@@ -89,16 +89,15 @@ mi_syscall(struct proc *p, register_t co
uvm_map_inentry_pc, p->p_vmspace->vm_map.wserial))
return (EPERM);
- if (lock)
- KERNEL_LOCK();
pledged = (p->p_p->ps_flags & PS_PLEDGE);
if (pledged && (error = pledge_syscall(p, code, &tval))) {
- if (!lock)
- KERNEL_LOCK();
+ KERNEL_LOCK();
error = pledge_fail(p, error, tval);
KERNEL_UNLOCK();
return (error);
}
+ if (lock)
+ KERNEL_LOCK();
error = (*callp->sy_call)(p, argp, retval);
if (lock)
KERNEL_UNLOCK();
Index: sys/syscallargs.h
===================================================================
RCS file: /home/cvs/src/sys/sys/syscallargs.h,v
retrieving revision 1.238
diff -u -p -r1.238 syscallargs.h
--- sys/syscallargs.h 27 Jun 2022 14:26:06 -0000 1.238
+++ sys/syscallargs.h 28 Jun 2022 15:18:25 -0000
@@ -1,10 +1,10 @@
-/* $OpenBSD: syscallargs.h,v 1.238 2022/06/27 14:26:06 cheloha Exp $
*/
+/* $OpenBSD$ */
/*
* System call argument lists.
*
* DO NOT EDIT-- this file is automatically generated.
- * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
mvs Exp
+ * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
cheloha Exp
*/
#ifdef syscallarg
--
jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF DDCC 0DFA 74AE 1524 E7EE