On 28/06/22(Tue) 18:17, Jeremie Courreges-Anglas wrote: > > Initially I just wandered in syscall_mi.h and found the locking scheme > super weird, even if technically correct. pledge_syscall() better be > safe to call without the kernel lock so I don't understand why we're > sometimes calling it with the kernel lock and sometimes not. > > ps_pledge is 64 bits so it's not possible to unset bits in an atomic > manner on all architectures. Even if we're only removing bits and there > is probably no way to see a completely garbage value, it makes sense to > just protect ps_pledge (and ps_execpledge) in the usual manner so that > we can unlock the syscall. The diff below protects the fields using > ps_mtx even though I initially used a dedicated ps_pledge_mtx. > unveil_destroy() needs to be moved after the critical section. > regress/sys/kern/pledge looks happy with this. The sys/syscall_mi.h > change can be committed in a separate step. > > Input and oks welcome.
This looks nice. I doubt there's any existing program where you can really test this. Even firefox and chromium should do things correctly. Maybe you should write a regress test that tries to break the kernel. > Index: arch/amd64/amd64/vmm.c > =================================================================== > RCS file: /home/cvs/src/sys/arch/amd64/amd64/vmm.c,v > retrieving revision 1.315 > diff -u -p -r1.315 vmm.c > --- arch/amd64/amd64/vmm.c 27 Jun 2022 15:12:14 -0000 1.315 > +++ arch/amd64/amd64/vmm.c 28 Jun 2022 13:54:25 -0000 > @@ -713,7 +713,7 @@ pledge_ioctl_vmm(struct proc *p, long co > case VMM_IOC_CREATE: > case VMM_IOC_INFO: > /* The "parent" process in vmd forks and manages VMs */ > - if (p->p_p->ps_pledge & PLEDGE_PROC) > + if (pledge_get(p->p_p) & PLEDGE_PROC) > return (0); > break; > case VMM_IOC_TERM: > @@ -1312,7 +1312,7 @@ vm_find(uint32_t id, struct vm **res) > * The managing vmm parent process can lookup all > * all VMs and is indicated by PLEDGE_PROC. > */ > - if (((p->p_p->ps_pledge & > + if (((pledge_get(p->p_p) & > (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) && > (vm->vm_creator_pid != p->p_p->ps_pid)) > return (pledge_fail(p, EPERM, PLEDGE_VMM)); > Index: kern/init_sysent.c > =================================================================== > RCS file: /home/cvs/src/sys/kern/init_sysent.c,v > retrieving revision 1.238 > diff -u -p -r1.238 init_sysent.c > --- kern/init_sysent.c 27 Jun 2022 14:26:05 -0000 1.238 > +++ kern/init_sysent.c 28 Jun 2022 15:18:25 -0000 > @@ -1,10 +1,10 @@ > -/* $OpenBSD: init_sysent.c,v 1.238 2022/06/27 14:26:05 cheloha Exp $ > */ > +/* $OpenBSD$ */ > > /* > * System call switch table. > * > * DO NOT EDIT-- this file is automatically generated. > - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 > mvs Exp > + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 > cheloha Exp > */ > > #include <sys/param.h> > @@ -248,7 +248,7 @@ const struct sysent sysent[] = { > sys_listen }, /* 106 = listen */ > { 4, s(struct sys_chflagsat_args), 0, > sys_chflagsat }, /* 107 = chflagsat */ > - { 2, s(struct sys_pledge_args), 0, > + { 2, s(struct sys_pledge_args), SY_NOLOCK | 0, > sys_pledge }, /* 108 = pledge */ > { 4, s(struct sys_ppoll_args), 0, > sys_ppoll }, /* 109 = ppoll */ > Index: kern/kern_event.c > =================================================================== > RCS file: /home/cvs/src/sys/kern/kern_event.c,v > retrieving revision 1.191 > diff -u -p -r1.191 kern_event.c > --- kern/kern_event.c 27 Jun 2022 13:35:21 -0000 1.191 > +++ kern/kern_event.c 28 Jun 2022 13:55:18 -0000 > @@ -331,7 +331,7 @@ filt_procattach(struct knote *kn) > int s; > > if ((curproc->p_p->ps_flags & PS_PLEDGE) && > - (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) > + (pledge_get(curproc->p_p) & PLEDGE_PROC) == 0) > return pledge_fail(curproc, EPERM, PLEDGE_PROC); > > if (kn->kn_id > PID_MAX) > Index: kern/kern_pledge.c > =================================================================== > RCS file: /home/cvs/src/sys/kern/kern_pledge.c,v > retrieving revision 1.282 > diff -u -p -r1.282 kern_pledge.c > --- kern/kern_pledge.c 26 Jun 2022 06:11:49 -0000 1.282 > +++ kern/kern_pledge.c 28 Jun 2022 15:21:46 -0000 > @@ -21,6 +21,7 @@ > > #include <sys/mount.h> > #include <sys/proc.h> > +#include <sys/mutex.h> > #include <sys/fcntl.h> > #include <sys/file.h> > #include <sys/filedesc.h> > @@ -465,13 +466,26 @@ sys_pledge(struct proc *p, void *v, regi > struct process *pr = p->p_p; > uint64_t promises, execpromises; > int error; > + int unveil_cleanup = 0; > > + /* Check for any error in user input */ > if (SCARG(uap, promises)) { > error = parsepledges(p, "pledgereq", > SCARG(uap, promises), &promises); > if (error) > return (error); > + } > + if (SCARG(uap, execpromises)) { > + error = parsepledges(p, "pledgeexecreq", > + SCARG(uap, execpromises), &execpromises); > + if (error) > + return (error); > + } > + > + mtx_enter(&pr->ps_mtx); > > + /* Check for any error wrt current promises */ > + if (SCARG(uap, promises)) { > /* In "error" mode, ignore promise increase requests, > * but accept promise decrease requests */ > if (ISSET(pr->ps_flags, PS_PLEDGE) && > @@ -480,43 +494,55 @@ sys_pledge(struct proc *p, void *v, regi > > /* Only permit reductions */ > if (ISSET(pr->ps_flags, PS_PLEDGE) && > - (((promises | pr->ps_pledge) != pr->ps_pledge))) > + (((promises | pr->ps_pledge) != pr->ps_pledge))) { > + mtx_leave(&pr->ps_mtx); > return (EPERM); > + } > } > if (SCARG(uap, execpromises)) { > - error = parsepledges(p, "pledgeexecreq", > - SCARG(uap, execpromises), &execpromises); > - if (error) > - return (error); > - > /* Only permit reductions */ > if (ISSET(pr->ps_flags, PS_EXECPLEDGE) && > - (((execpromises | pr->ps_execpledge) != pr->ps_execpledge))) > + (((execpromises | pr->ps_execpledge) != > pr->ps_execpledge))) { > + mtx_leave(&pr->ps_mtx); > return (EPERM); > + } > } > > + /* Set up promises */ > if (SCARG(uap, promises)) { > pr->ps_pledge = promises; > atomic_setbits_int(&pr->ps_flags, PS_PLEDGE); > - /* > - * Kill off unveil and drop unveil vnode refs if we no > - * longer are holding any path-accessing pledge > - */ > + > if ((pr->ps_pledge & (PLEDGE_RPATH | PLEDGE_WPATH | > PLEDGE_CPATH | PLEDGE_DPATH | PLEDGE_TMPPATH | PLEDGE_EXEC | > PLEDGE_UNIX | PLEDGE_UNVEIL)) == 0) > - unveil_destroy(pr); > + unveil_cleanup = 1; > } > if (SCARG(uap, execpromises)) { > pr->ps_execpledge = execpromises; > atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE); > } > + > + mtx_leave(&pr->ps_mtx); > + > + if (unveil_cleanup) { > + /* > + * Kill off unveil and drop unveil vnode refs if we no > + * longer are holding any path-accessing pledge > + */ > + KERNEL_LOCK(); > + unveil_destroy(pr); > + KERNEL_UNLOCK(); > + } > + > return (0); > } > > int > pledge_syscall(struct proc *p, int code, uint64_t *tval) > { > + uint64_t pledge; > + > p->p_pledge_syscall = code; > *tval = 0; > > @@ -526,7 +552,8 @@ pledge_syscall(struct proc *p, int code, > if (pledge_syscalls[code] == PLEDGE_ALWAYS) > return (0); > > - if (p->p_p->ps_pledge & pledge_syscalls[code]) > + pledge = pledge_get(p->p_p);; > + if (pledge & pledge_syscalls[code]) > return (0); > > *tval = pledge_syscalls[code]; > @@ -549,7 +576,7 @@ pledge_fail(struct proc *p, int error, u > if (KTRPOINT(p, KTR_PLEDGE)) > ktrpledge(p, error, code, p->p_pledge_syscall); > #endif > - if (p->p_p->ps_pledge & PLEDGE_ERROR) > + if (pledge_get(p->p_p) & PLEDGE_ERROR) > return (ENOSYS); > > KERNEL_LOCK(); > @@ -583,7 +610,7 @@ pledge_namei(struct proc *p, struct name > if ((p->p_p->ps_flags & PS_PLEDGE) == 0 || > (p->p_p->ps_flags & PS_COREDUMP)) > return (0); > - pledge = p->p_p->ps_pledge; > + pledge = pledge_get(p->p_p); > > if (ni->ni_pledge == 0) > panic("pledge_namei: ni_pledge"); > @@ -704,11 +731,12 @@ pledge_namei(struct proc *p, struct name > * XXX > * The current hack for YP support in "getpw" > * is to enable some "inet" features until > - * next pledge call. Setting a bit in ps_pledge > - * is not safe with respect to multiple threads, > - * a very different approach is needed. > + * next pledge call. > */ > + mtx_enter(&p->p_p->ps_mtx); > p->p_p->ps_pledge |= PLEDGE_YPACTIVE; > + mtx_leave(&p->p_p->ps_mtx); > + pledge |= PLEDGE_YPACTIVE; > ni->ni_cnd.cn_flags |= BYPASSUNVEIL; > return (0); > } > @@ -770,7 +798,7 @@ pledge_recvfd(struct proc *p, struct fil > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0) > + if ((pledge_get(p->p_p) & PLEDGE_RECVFD) == 0) > return pledge_fail(p, EPERM, PLEDGE_RECVFD); > > switch (fp->f_type) { > @@ -798,7 +826,7 @@ pledge_sendfd(struct proc *p, struct fil > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0) > + if ((pledge_get(p->p_p) & PLEDGE_SENDFD) == 0) > return pledge_fail(p, EPERM, PLEDGE_SENDFD); > > switch (fp->f_type) { > @@ -826,7 +854,7 @@ pledge_sysctl(struct proc *p, int miblen > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - pledge = p->p_p->ps_pledge; > + pledge = pledge_get(p->p_p); > > if (new) > return pledge_fail(p, EFAULT, 0); > @@ -1023,7 +1051,7 @@ pledge_chown(struct proc *p, uid_t uid, > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if (p->p_p->ps_pledge & PLEDGE_CHOWNUID) > + if (pledge_get(p->p_p) & PLEDGE_CHOWNUID) > return (0); > > if (uid != -1 && uid != p->p_ucred->cr_uid) > @@ -1041,7 +1069,7 @@ pledge_adjtime(struct proc *p, const voi > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if ((p->p_p->ps_pledge & PLEDGE_SETTIME)) > + if ((pledge_get(p->p_p) & PLEDGE_SETTIME)) > return (0); > if (delta) > return (EPERM); > @@ -1054,7 +1082,7 @@ pledge_sendit(struct proc *p, const void > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS | > PLEDGE_YPACTIVE))) > + if ((pledge_get(p->p_p) & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS | > PLEDGE_YPACTIVE))) > return (0); /* may use address */ > if (to == NULL) > return (0); /* behaves just like write */ > @@ -1070,7 +1098,7 @@ pledge_ioctl(struct proc *p, long com, s > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - pledge = p->p_p->ps_pledge; > + pledge = pledge_get(p->p_p); > > /* > * The ioctl's which are always allowed. > @@ -1365,7 +1393,7 @@ pledge_sockopt(struct proc *p, int set, > > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - pledge = p->p_p->ps_pledge; > + pledge = pledge_get(p->p_p); > > /* Always allow these, which are too common to reject */ > switch (level) { > @@ -1515,7 +1543,7 @@ pledge_socket(struct proc *p, int domain > > if (!ISSET(p->p_p->ps_flags, PS_PLEDGE)) > return 0; > - pledge = p->p_p->ps_pledge; > + pledge = pledge_get(p->p_p); > > if (ISSET(state, SS_DNS)) { > if (ISSET(pledge, PLEDGE_DNS)) > @@ -1548,7 +1576,7 @@ pledge_flock(struct proc *p) > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > > - if ((p->p_p->ps_pledge & PLEDGE_FLOCK)) > + if ((pledge_get(p->p_p) & PLEDGE_FLOCK)) > return (0); > return (pledge_fail(p, EPERM, PLEDGE_FLOCK)); > } > @@ -1585,7 +1613,7 @@ pledge_fcntl(struct proc *p, int cmd) > { > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return (0); > - if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN) > + if ((pledge_get(p->p_p) & PLEDGE_PROC) == 0 && cmd == F_SETOWN) > return pledge_fail(p, EPERM, PLEDGE_PROC); > return (0); > } > @@ -1595,7 +1623,7 @@ pledge_kill(struct proc *p, pid_t pid) > { > if ((p->p_p->ps_flags & PS_PLEDGE) == 0) > return 0; > - if (p->p_p->ps_pledge & PLEDGE_PROC) > + if (pledge_get(p->p_p) & PLEDGE_PROC) > return 0; > if (pid == 0 || pid == p->p_p->ps_pid) > return 0; > @@ -1610,7 +1638,7 @@ pledge_protexec(struct proc *p, int prot > /* Before kbind(2) call, ld.so and crt may create EXEC mappings */ > if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0) > return 0; > - if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC)) > + if (!(pledge_get(p->p_p) & PLEDGE_PROTEXEC) && (prot & PROT_EXEC)) > return pledge_fail(p, EPERM, PLEDGE_PROTEXEC); > return 0; > } > Index: kern/syscalls.c > =================================================================== > RCS file: /home/cvs/src/sys/kern/syscalls.c,v > retrieving revision 1.236 > diff -u -p -r1.236 syscalls.c > --- kern/syscalls.c 16 May 2022 07:38:10 -0000 1.236 > +++ kern/syscalls.c 28 Jun 2022 15:18:25 -0000 > @@ -1,10 +1,10 @@ > -/* $OpenBSD: syscalls.c,v 1.236 2022/05/16 07:38:10 mvs Exp $ */ > +/* $OpenBSD$ */ > > /* > * System call names. > * > * DO NOT EDIT-- this file is automatically generated. > - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 > mvs Exp > + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 > cheloha Exp > */ > > const char *const syscallnames[] = { > Index: kern/syscalls.master > =================================================================== > RCS file: /home/cvs/src/sys/kern/syscalls.master,v > retrieving revision 1.225 > diff -u -p -r1.225 syscalls.master > --- kern/syscalls.master 27 Jun 2022 14:26:05 -0000 1.225 > +++ kern/syscalls.master 28 Jun 2022 12:47:19 -0000 > @@ -228,7 +228,7 @@ > 106 STD { int sys_listen(int s, int backlog); } > 107 STD { int sys_chflagsat(int fd, const char *path, \ > u_int flags, int atflags); } > -108 STD { int sys_pledge(const char *promises, \ > +108 STD NOLOCK { int sys_pledge(const char *promises, \ > const char *execpromises); } > 109 STD { int sys_ppoll(struct pollfd *fds, \ > u_int nfds, const struct timespec *ts, \ > Index: sys/proc.h > =================================================================== > RCS file: /home/cvs/src/sys/sys/proc.h,v > retrieving revision 1.331 > diff -u -p -r1.331 proc.h > --- sys/proc.h 27 Jun 2022 14:26:05 -0000 1.331 > +++ sys/proc.h 28 Jun 2022 15:23:12 -0000 > @@ -231,8 +231,8 @@ struct process { > > u_int32_t ps_acflag; /* Accounting flags. */ > > - uint64_t ps_pledge; > - uint64_t ps_execpledge; > + uint64_t ps_pledge; /* [m] pledge promises */ > + uint64_t ps_execpledge; /* [m] execpledge promises */ > > int64_t ps_kbind_cookie; /* [m] */ > u_long ps_kbind_addr; /* [m] */ > @@ -566,6 +566,19 @@ refreshcreds(struct proc *p) > if (pr->ps_ucred != p->p_ucred) > dorefreshcreds(pr, p); > } > + > +static inline uint64_t > +pledge_get(struct process *pr) > +{ > + uint64_t promises; > + > + mtx_enter(&pr->ps_mtx); > + promises = pr->ps_pledge; > + mtx_leave(&pr->ps_mtx); > + > + return promises; > +} > + > > enum single_thread_mode { > SINGLE_SUSPEND, /* other threads to stop wherever they are */ > Index: sys/syscall.h > =================================================================== > RCS file: /home/cvs/src/sys/sys/syscall.h,v > retrieving revision 1.235 > diff -u -p -r1.235 syscall.h > --- sys/syscall.h 27 Jun 2022 14:26:06 -0000 1.235 > +++ sys/syscall.h 28 Jun 2022 15:18:25 -0000 > @@ -1,10 +1,10 @@ > -/* $OpenBSD: syscall.h,v 1.235 2022/06/27 14:26:06 cheloha Exp $ */ > +/* $OpenBSD$ */ > > /* > * System call numbers. > * > * DO NOT EDIT-- this file is automatically generated. > - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 > mvs Exp > + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 > cheloha Exp > */ > > /* syscall: "syscall" ret: "int" args: "int" "..." */ > Index: sys/syscall_mi.h > =================================================================== > RCS file: /home/cvs/src/sys/sys/syscall_mi.h,v > retrieving revision 1.25 > diff -u -p -r1.25 syscall_mi.h > --- sys/syscall_mi.h 21 Jan 2020 16:16:23 -0000 1.25 > +++ sys/syscall_mi.h 28 Jun 2022 12:43:07 -0000 > @@ -89,16 +89,15 @@ mi_syscall(struct proc *p, register_t co > uvm_map_inentry_pc, p->p_vmspace->vm_map.wserial)) > return (EPERM); > > - if (lock) > - KERNEL_LOCK(); > pledged = (p->p_p->ps_flags & PS_PLEDGE); > if (pledged && (error = pledge_syscall(p, code, &tval))) { > - if (!lock) > - KERNEL_LOCK(); > + KERNEL_LOCK(); > error = pledge_fail(p, error, tval); > KERNEL_UNLOCK(); > return (error); > } > + if (lock) > + KERNEL_LOCK(); > error = (*callp->sy_call)(p, argp, retval); > if (lock) > KERNEL_UNLOCK(); > Index: sys/syscallargs.h > =================================================================== > RCS file: /home/cvs/src/sys/sys/syscallargs.h,v > retrieving revision 1.238 > diff -u -p -r1.238 syscallargs.h > --- sys/syscallargs.h 27 Jun 2022 14:26:06 -0000 1.238 > +++ sys/syscallargs.h 28 Jun 2022 15:18:25 -0000 > @@ -1,10 +1,10 @@ > -/* $OpenBSD: syscallargs.h,v 1.238 2022/06/27 14:26:06 cheloha Exp $ > */ > +/* $OpenBSD$ */ > > /* > * System call argument lists. > * > * DO NOT EDIT-- this file is automatically generated. > - * created from; OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 > mvs Exp > + * created from; OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 > cheloha Exp > */ > > #ifdef syscallarg > > -- > jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF DDCC 0DFA 74AE 1524 E7EE >