On 28/06/22(Tue) 18:17, Jeremie Courreges-Anglas wrote:
>
> Initially I just wandered in syscall_mi.h and found the locking scheme
> super weird, even if technically correct. pledge_syscall() better be
> safe to call without the kernel lock so I don't understand why we're
> sometimes calling it with the kernel lock and sometimes not.
>
> ps_pledge is 64 bits so it's not possible to unset bits in an atomic
> manner on all architectures. Even if we're only removing bits and there
> is probably no way to see a completely garbage value, it makes sense to
> just protect ps_pledge (and ps_execpledge) in the usual manner so that
> we can unlock the syscall. The diff below protects the fields using
> ps_mtx even though I initially used a dedicated ps_pledge_mtx.
> unveil_destroy() needs to be moved after the critical section.
> regress/sys/kern/pledge looks happy with this. The sys/syscall_mi.h
> change can be committed in a separate step.
>
> Input and oks welcome.
This looks nice. I doubt there's any existing program where you can
really test this. Even firefox and chromium should do things
correctly.
Maybe you should write a regress test that tries to break the kernel.
> Index: arch/amd64/amd64/vmm.c
> ===================================================================
> RCS file: /home/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.315
> diff -u -p -r1.315 vmm.c
> --- arch/amd64/amd64/vmm.c 27 Jun 2022 15:12:14 -0000 1.315
> +++ arch/amd64/amd64/vmm.c 28 Jun 2022 13:54:25 -0000
> @@ -713,7 +713,7 @@ pledge_ioctl_vmm(struct proc *p, long co
> case VMM_IOC_CREATE:
> case VMM_IOC_INFO:
> /* The "parent" process in vmd forks and manages VMs */
> - if (p->p_p->ps_pledge & PLEDGE_PROC)
> + if (pledge_get(p->p_p) & PLEDGE_PROC)
> return (0);
> break;
> case VMM_IOC_TERM:
> @@ -1312,7 +1312,7 @@ vm_find(uint32_t id, struct vm **res)
> * The managing vmm parent process can lookup all
> * all VMs and is indicated by PLEDGE_PROC.
> */
> - if (((p->p_p->ps_pledge &
> + if (((pledge_get(p->p_p) &
> (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
> (vm->vm_creator_pid != p->p_p->ps_pid))
> return (pledge_fail(p, EPERM, PLEDGE_VMM));
> Index: kern/init_sysent.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/init_sysent.c,v
> retrieving revision 1.238
> diff -u -p -r1.238 init_sysent.c
> --- kern/init_sysent.c 27 Jun 2022 14:26:05 -0000 1.238
> +++ kern/init_sysent.c 28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/* $OpenBSD: init_sysent.c,v 1.238 2022/06/27 14:26:05 cheloha Exp $
> */
> +/* $OpenBSD$ */
>
> /*
> * System call switch table.
> *
> * DO NOT EDIT-- this file is automatically generated.
> - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
> mvs Exp
> + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
> cheloha Exp
> */
>
> #include <sys/param.h>
> @@ -248,7 +248,7 @@ const struct sysent sysent[] = {
> sys_listen }, /* 106 = listen */
> { 4, s(struct sys_chflagsat_args), 0,
> sys_chflagsat }, /* 107 = chflagsat */
> - { 2, s(struct sys_pledge_args), 0,
> + { 2, s(struct sys_pledge_args), SY_NOLOCK | 0,
> sys_pledge }, /* 108 = pledge */
> { 4, s(struct sys_ppoll_args), 0,
> sys_ppoll }, /* 109 = ppoll */
> Index: kern/kern_event.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/kern_event.c,v
> retrieving revision 1.191
> diff -u -p -r1.191 kern_event.c
> --- kern/kern_event.c 27 Jun 2022 13:35:21 -0000 1.191
> +++ kern/kern_event.c 28 Jun 2022 13:55:18 -0000
> @@ -331,7 +331,7 @@ filt_procattach(struct knote *kn)
> int s;
>
> if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
> - (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
> + (pledge_get(curproc->p_p) & PLEDGE_PROC) == 0)
> return pledge_fail(curproc, EPERM, PLEDGE_PROC);
>
> if (kn->kn_id > PID_MAX)
> Index: kern/kern_pledge.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/kern_pledge.c,v
> retrieving revision 1.282
> diff -u -p -r1.282 kern_pledge.c
> --- kern/kern_pledge.c 26 Jun 2022 06:11:49 -0000 1.282
> +++ kern/kern_pledge.c 28 Jun 2022 15:21:46 -0000
> @@ -21,6 +21,7 @@
>
> #include <sys/mount.h>
> #include <sys/proc.h>
> +#include <sys/mutex.h>
> #include <sys/fcntl.h>
> #include <sys/file.h>
> #include <sys/filedesc.h>
> @@ -465,13 +466,26 @@ sys_pledge(struct proc *p, void *v, regi
> struct process *pr = p->p_p;
> uint64_t promises, execpromises;
> int error;
> + int unveil_cleanup = 0;
>
> + /* Check for any error in user input */
> if (SCARG(uap, promises)) {
> error = parsepledges(p, "pledgereq",
> SCARG(uap, promises), &promises);
> if (error)
> return (error);
> + }
> + if (SCARG(uap, execpromises)) {
> + error = parsepledges(p, "pledgeexecreq",
> + SCARG(uap, execpromises), &execpromises);
> + if (error)
> + return (error);
> + }
> +
> + mtx_enter(&pr->ps_mtx);
>
> + /* Check for any error wrt current promises */
> + if (SCARG(uap, promises)) {
> /* In "error" mode, ignore promise increase requests,
> * but accept promise decrease requests */
> if (ISSET(pr->ps_flags, PS_PLEDGE) &&
> @@ -480,43 +494,55 @@ sys_pledge(struct proc *p, void *v, regi
>
> /* Only permit reductions */
> if (ISSET(pr->ps_flags, PS_PLEDGE) &&
> - (((promises | pr->ps_pledge) != pr->ps_pledge)))
> + (((promises | pr->ps_pledge) != pr->ps_pledge))) {
> + mtx_leave(&pr->ps_mtx);
> return (EPERM);
> + }
> }
> if (SCARG(uap, execpromises)) {
> - error = parsepledges(p, "pledgeexecreq",
> - SCARG(uap, execpromises), &execpromises);
> - if (error)
> - return (error);
> -
> /* Only permit reductions */
> if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
> - (((execpromises | pr->ps_execpledge) != pr->ps_execpledge)))
> + (((execpromises | pr->ps_execpledge) !=
> pr->ps_execpledge))) {
> + mtx_leave(&pr->ps_mtx);
> return (EPERM);
> + }
> }
>
> + /* Set up promises */
> if (SCARG(uap, promises)) {
> pr->ps_pledge = promises;
> atomic_setbits_int(&pr->ps_flags, PS_PLEDGE);
> - /*
> - * Kill off unveil and drop unveil vnode refs if we no
> - * longer are holding any path-accessing pledge
> - */
> +
> if ((pr->ps_pledge & (PLEDGE_RPATH | PLEDGE_WPATH |
> PLEDGE_CPATH | PLEDGE_DPATH | PLEDGE_TMPPATH | PLEDGE_EXEC |
> PLEDGE_UNIX | PLEDGE_UNVEIL)) == 0)
> - unveil_destroy(pr);
> + unveil_cleanup = 1;
> }
> if (SCARG(uap, execpromises)) {
> pr->ps_execpledge = execpromises;
> atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
> }
> +
> + mtx_leave(&pr->ps_mtx);
> +
> + if (unveil_cleanup) {
> + /*
> + * Kill off unveil and drop unveil vnode refs if we no
> + * longer are holding any path-accessing pledge
> + */
> + KERNEL_LOCK();
> + unveil_destroy(pr);
> + KERNEL_UNLOCK();
> + }
> +
> return (0);
> }
>
> int
> pledge_syscall(struct proc *p, int code, uint64_t *tval)
> {
> + uint64_t pledge;
> +
> p->p_pledge_syscall = code;
> *tval = 0;
>
> @@ -526,7 +552,8 @@ pledge_syscall(struct proc *p, int code,
> if (pledge_syscalls[code] == PLEDGE_ALWAYS)
> return (0);
>
> - if (p->p_p->ps_pledge & pledge_syscalls[code])
> + pledge = pledge_get(p->p_p);;
> + if (pledge & pledge_syscalls[code])
> return (0);
>
> *tval = pledge_syscalls[code];
> @@ -549,7 +576,7 @@ pledge_fail(struct proc *p, int error, u
> if (KTRPOINT(p, KTR_PLEDGE))
> ktrpledge(p, error, code, p->p_pledge_syscall);
> #endif
> - if (p->p_p->ps_pledge & PLEDGE_ERROR)
> + if (pledge_get(p->p_p) & PLEDGE_ERROR)
> return (ENOSYS);
>
> KERNEL_LOCK();
> @@ -583,7 +610,7 @@ pledge_namei(struct proc *p, struct name
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
> (p->p_p->ps_flags & PS_COREDUMP))
> return (0);
> - pledge = p->p_p->ps_pledge;
> + pledge = pledge_get(p->p_p);
>
> if (ni->ni_pledge == 0)
> panic("pledge_namei: ni_pledge");
> @@ -704,11 +731,12 @@ pledge_namei(struct proc *p, struct name
> * XXX
> * The current hack for YP support in "getpw"
> * is to enable some "inet" features until
> - * next pledge call. Setting a bit in ps_pledge
> - * is not safe with respect to multiple threads,
> - * a very different approach is needed.
> + * next pledge call.
> */
> + mtx_enter(&p->p_p->ps_mtx);
> p->p_p->ps_pledge |= PLEDGE_YPACTIVE;
> + mtx_leave(&p->p_p->ps_mtx);
> + pledge |= PLEDGE_YPACTIVE;
> ni->ni_cnd.cn_flags |= BYPASSUNVEIL;
> return (0);
> }
> @@ -770,7 +798,7 @@ pledge_recvfd(struct proc *p, struct fil
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0)
> + if ((pledge_get(p->p_p) & PLEDGE_RECVFD) == 0)
> return pledge_fail(p, EPERM, PLEDGE_RECVFD);
>
> switch (fp->f_type) {
> @@ -798,7 +826,7 @@ pledge_sendfd(struct proc *p, struct fil
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0)
> + if ((pledge_get(p->p_p) & PLEDGE_SENDFD) == 0)
> return pledge_fail(p, EPERM, PLEDGE_SENDFD);
>
> switch (fp->f_type) {
> @@ -826,7 +854,7 @@ pledge_sysctl(struct proc *p, int miblen
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - pledge = p->p_p->ps_pledge;
> + pledge = pledge_get(p->p_p);
>
> if (new)
> return pledge_fail(p, EFAULT, 0);
> @@ -1023,7 +1051,7 @@ pledge_chown(struct proc *p, uid_t uid,
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if (p->p_p->ps_pledge & PLEDGE_CHOWNUID)
> + if (pledge_get(p->p_p) & PLEDGE_CHOWNUID)
> return (0);
>
> if (uid != -1 && uid != p->p_ucred->cr_uid)
> @@ -1041,7 +1069,7 @@ pledge_adjtime(struct proc *p, const voi
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if ((p->p_p->ps_pledge & PLEDGE_SETTIME))
> + if ((pledge_get(p->p_p) & PLEDGE_SETTIME))
> return (0);
> if (delta)
> return (EPERM);
> @@ -1054,7 +1082,7 @@ pledge_sendit(struct proc *p, const void
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS |
> PLEDGE_YPACTIVE)))
> + if ((pledge_get(p->p_p) & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS |
> PLEDGE_YPACTIVE)))
> return (0); /* may use address */
> if (to == NULL)
> return (0); /* behaves just like write */
> @@ -1070,7 +1098,7 @@ pledge_ioctl(struct proc *p, long com, s
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - pledge = p->p_p->ps_pledge;
> + pledge = pledge_get(p->p_p);
>
> /*
> * The ioctl's which are always allowed.
> @@ -1365,7 +1393,7 @@ pledge_sockopt(struct proc *p, int set,
>
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - pledge = p->p_p->ps_pledge;
> + pledge = pledge_get(p->p_p);
>
> /* Always allow these, which are too common to reject */
> switch (level) {
> @@ -1515,7 +1543,7 @@ pledge_socket(struct proc *p, int domain
>
> if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
> return 0;
> - pledge = p->p_p->ps_pledge;
> + pledge = pledge_get(p->p_p);
>
> if (ISSET(state, SS_DNS)) {
> if (ISSET(pledge, PLEDGE_DNS))
> @@ -1548,7 +1576,7 @@ pledge_flock(struct proc *p)
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
>
> - if ((p->p_p->ps_pledge & PLEDGE_FLOCK))
> + if ((pledge_get(p->p_p) & PLEDGE_FLOCK))
> return (0);
> return (pledge_fail(p, EPERM, PLEDGE_FLOCK));
> }
> @@ -1585,7 +1613,7 @@ pledge_fcntl(struct proc *p, int cmd)
> {
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return (0);
> - if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
> + if ((pledge_get(p->p_p) & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
> return pledge_fail(p, EPERM, PLEDGE_PROC);
> return (0);
> }
> @@ -1595,7 +1623,7 @@ pledge_kill(struct proc *p, pid_t pid)
> {
> if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
> return 0;
> - if (p->p_p->ps_pledge & PLEDGE_PROC)
> + if (pledge_get(p->p_p) & PLEDGE_PROC)
> return 0;
> if (pid == 0 || pid == p->p_p->ps_pid)
> return 0;
> @@ -1610,7 +1638,7 @@ pledge_protexec(struct proc *p, int prot
> /* Before kbind(2) call, ld.so and crt may create EXEC mappings */
> if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0)
> return 0;
> - if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
> + if (!(pledge_get(p->p_p) & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
> return pledge_fail(p, EPERM, PLEDGE_PROTEXEC);
> return 0;
> }
> Index: kern/syscalls.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/syscalls.c,v
> retrieving revision 1.236
> diff -u -p -r1.236 syscalls.c
> --- kern/syscalls.c 16 May 2022 07:38:10 -0000 1.236
> +++ kern/syscalls.c 28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/* $OpenBSD: syscalls.c,v 1.236 2022/05/16 07:38:10 mvs Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * System call names.
> *
> * DO NOT EDIT-- this file is automatically generated.
> - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
> mvs Exp
> + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
> cheloha Exp
> */
>
> const char *const syscallnames[] = {
> Index: kern/syscalls.master
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.225
> diff -u -p -r1.225 syscalls.master
> --- kern/syscalls.master 27 Jun 2022 14:26:05 -0000 1.225
> +++ kern/syscalls.master 28 Jun 2022 12:47:19 -0000
> @@ -228,7 +228,7 @@
> 106 STD { int sys_listen(int s, int backlog); }
> 107 STD { int sys_chflagsat(int fd, const char *path, \
> u_int flags, int atflags); }
> -108 STD { int sys_pledge(const char *promises, \
> +108 STD NOLOCK { int sys_pledge(const char *promises, \
> const char *execpromises); }
> 109 STD { int sys_ppoll(struct pollfd *fds, \
> u_int nfds, const struct timespec *ts, \
> Index: sys/proc.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/proc.h,v
> retrieving revision 1.331
> diff -u -p -r1.331 proc.h
> --- sys/proc.h 27 Jun 2022 14:26:05 -0000 1.331
> +++ sys/proc.h 28 Jun 2022 15:23:12 -0000
> @@ -231,8 +231,8 @@ struct process {
>
> u_int32_t ps_acflag; /* Accounting flags. */
>
> - uint64_t ps_pledge;
> - uint64_t ps_execpledge;
> + uint64_t ps_pledge; /* [m] pledge promises */
> + uint64_t ps_execpledge; /* [m] execpledge promises */
>
> int64_t ps_kbind_cookie; /* [m] */
> u_long ps_kbind_addr; /* [m] */
> @@ -566,6 +566,19 @@ refreshcreds(struct proc *p)
> if (pr->ps_ucred != p->p_ucred)
> dorefreshcreds(pr, p);
> }
> +
> +static inline uint64_t
> +pledge_get(struct process *pr)
> +{
> + uint64_t promises;
> +
> + mtx_enter(&pr->ps_mtx);
> + promises = pr->ps_pledge;
> + mtx_leave(&pr->ps_mtx);
> +
> + return promises;
> +}
> +
>
> enum single_thread_mode {
> SINGLE_SUSPEND, /* other threads to stop wherever they are */
> Index: sys/syscall.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscall.h,v
> retrieving revision 1.235
> diff -u -p -r1.235 syscall.h
> --- sys/syscall.h 27 Jun 2022 14:26:06 -0000 1.235
> +++ sys/syscall.h 28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/* $OpenBSD: syscall.h,v 1.235 2022/06/27 14:26:06 cheloha Exp $ */
> +/* $OpenBSD$ */
>
> /*
> * System call numbers.
> *
> * DO NOT EDIT-- this file is automatically generated.
> - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
> mvs Exp
> + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
> cheloha Exp
> */
>
> /* syscall: "syscall" ret: "int" args: "int" "..." */
> Index: sys/syscall_mi.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscall_mi.h,v
> retrieving revision 1.25
> diff -u -p -r1.25 syscall_mi.h
> --- sys/syscall_mi.h 21 Jan 2020 16:16:23 -0000 1.25
> +++ sys/syscall_mi.h 28 Jun 2022 12:43:07 -0000
> @@ -89,16 +89,15 @@ mi_syscall(struct proc *p, register_t co
> uvm_map_inentry_pc, p->p_vmspace->vm_map.wserial))
> return (EPERM);
>
> - if (lock)
> - KERNEL_LOCK();
> pledged = (p->p_p->ps_flags & PS_PLEDGE);
> if (pledged && (error = pledge_syscall(p, code, &tval))) {
> - if (!lock)
> - KERNEL_LOCK();
> + KERNEL_LOCK();
> error = pledge_fail(p, error, tval);
> KERNEL_UNLOCK();
> return (error);
> }
> + if (lock)
> + KERNEL_LOCK();
> error = (*callp->sy_call)(p, argp, retval);
> if (lock)
> KERNEL_UNLOCK();
> Index: sys/syscallargs.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscallargs.h,v
> retrieving revision 1.238
> diff -u -p -r1.238 syscallargs.h
> --- sys/syscallargs.h 27 Jun 2022 14:26:06 -0000 1.238
> +++ sys/syscallargs.h 28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/* $OpenBSD: syscallargs.h,v 1.238 2022/06/27 14:26:06 cheloha Exp $
> */
> +/* $OpenBSD$ */
>
> /*
> * System call argument lists.
> *
> * DO NOT EDIT-- this file is automatically generated.
> - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04
> mvs Exp
> + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05
> cheloha Exp
> */
>
> #ifdef syscallarg
>
> --
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF DDCC 0DFA 74AE 1524 E7EE
>