On 28/06/22(Tue) 18:17, Jeremie Courreges-Anglas wrote:
> 
> Initially I just wandered in syscall_mi.h and found the locking scheme
> super weird, even if technically correct.  pledge_syscall() better be
> safe to call without the kernel lock so I don't understand why we're
> sometimes calling it with the kernel lock and sometimes not.
> 
> ps_pledge is 64 bits so it's not possible to unset bits in an atomic
> manner on all architectures.  Even if we're only removing bits and there
> is probably no way to see a completely garbage value, it makes sense to
> just protect ps_pledge (and ps_execpledge) in the usual manner so that
> we can unlock the syscall.  The diff below protects the fields using
> ps_mtx even though I initially used a dedicated ps_pledge_mtx.
> unveil_destroy() needs to be moved after the critical section.
> regress/sys/kern/pledge looks happy with this.  The sys/syscall_mi.h
> change can be committed in a separate step.
> 
> Input and oks welcome.

This looks nice.  I doubt there's any existing program where you can
really test this.  Even firefox and chromium should do things
correctly.

Maybe you should write a regress test that tries to break the kernel.

> Index: arch/amd64/amd64/vmm.c
> ===================================================================
> RCS file: /home/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.315
> diff -u -p -r1.315 vmm.c
> --- arch/amd64/amd64/vmm.c    27 Jun 2022 15:12:14 -0000      1.315
> +++ arch/amd64/amd64/vmm.c    28 Jun 2022 13:54:25 -0000
> @@ -713,7 +713,7 @@ pledge_ioctl_vmm(struct proc *p, long co
>       case VMM_IOC_CREATE:
>       case VMM_IOC_INFO:
>               /* The "parent" process in vmd forks and manages VMs */
> -             if (p->p_p->ps_pledge & PLEDGE_PROC)
> +             if (pledge_get(p->p_p) & PLEDGE_PROC)
>                       return (0);
>               break;
>       case VMM_IOC_TERM:
> @@ -1312,7 +1312,7 @@ vm_find(uint32_t id, struct vm **res)
>                        * The managing vmm parent process can lookup all
>                        * all VMs and is indicated by PLEDGE_PROC.
>                        */
> -                     if (((p->p_p->ps_pledge &
> +                     if (((pledge_get(p->p_p) &
>                           (PLEDGE_VMM | PLEDGE_PROC)) == PLEDGE_VMM) &&
>                           (vm->vm_creator_pid != p->p_p->ps_pid))
>                               return (pledge_fail(p, EPERM, PLEDGE_VMM));
> Index: kern/init_sysent.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/init_sysent.c,v
> retrieving revision 1.238
> diff -u -p -r1.238 init_sysent.c
> --- kern/init_sysent.c        27 Jun 2022 14:26:05 -0000      1.238
> +++ kern/init_sysent.c        28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: init_sysent.c,v 1.238 2022/06/27 14:26:05 cheloha Exp $       
> */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call switch table.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  #include <sys/param.h>
> @@ -248,7 +248,7 @@ const struct sysent sysent[] = {
>           sys_listen },                       /* 106 = listen */
>       { 4, s(struct sys_chflagsat_args), 0,
>           sys_chflagsat },                    /* 107 = chflagsat */
> -     { 2, s(struct sys_pledge_args), 0,
> +     { 2, s(struct sys_pledge_args), SY_NOLOCK | 0,
>           sys_pledge },                       /* 108 = pledge */
>       { 4, s(struct sys_ppoll_args), 0,
>           sys_ppoll },                        /* 109 = ppoll */
> Index: kern/kern_event.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/kern_event.c,v
> retrieving revision 1.191
> diff -u -p -r1.191 kern_event.c
> --- kern/kern_event.c 27 Jun 2022 13:35:21 -0000      1.191
> +++ kern/kern_event.c 28 Jun 2022 13:55:18 -0000
> @@ -331,7 +331,7 @@ filt_procattach(struct knote *kn)
>       int s;
>  
>       if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
> -         (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0)
> +         (pledge_get(curproc->p_p) & PLEDGE_PROC) == 0)
>               return pledge_fail(curproc, EPERM, PLEDGE_PROC);
>  
>       if (kn->kn_id > PID_MAX)
> Index: kern/kern_pledge.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/kern_pledge.c,v
> retrieving revision 1.282
> diff -u -p -r1.282 kern_pledge.c
> --- kern/kern_pledge.c        26 Jun 2022 06:11:49 -0000      1.282
> +++ kern/kern_pledge.c        28 Jun 2022 15:21:46 -0000
> @@ -21,6 +21,7 @@
>  
>  #include <sys/mount.h>
>  #include <sys/proc.h>
> +#include <sys/mutex.h>
>  #include <sys/fcntl.h>
>  #include <sys/file.h>
>  #include <sys/filedesc.h>
> @@ -465,13 +466,26 @@ sys_pledge(struct proc *p, void *v, regi
>       struct process *pr = p->p_p;
>       uint64_t promises, execpromises;
>       int error;
> +     int unveil_cleanup = 0;
>  
> +     /* Check for any error in user input */
>       if (SCARG(uap, promises)) {
>               error = parsepledges(p, "pledgereq",
>                   SCARG(uap, promises), &promises);
>               if (error)
>                       return (error);
> +     }
> +     if (SCARG(uap, execpromises)) {
> +             error = parsepledges(p, "pledgeexecreq",
> +                 SCARG(uap, execpromises), &execpromises);
> +             if (error)
> +                     return (error);
> +     }
> +
> +     mtx_enter(&pr->ps_mtx);
>  
> +     /* Check for any error wrt current promises */
> +     if (SCARG(uap, promises)) {
>               /* In "error" mode, ignore promise increase requests,
>                * but accept promise decrease requests */
>               if (ISSET(pr->ps_flags, PS_PLEDGE) &&
> @@ -480,43 +494,55 @@ sys_pledge(struct proc *p, void *v, regi
>  
>               /* Only permit reductions */
>               if (ISSET(pr->ps_flags, PS_PLEDGE) &&
> -                 (((promises | pr->ps_pledge) != pr->ps_pledge)))
> +                 (((promises | pr->ps_pledge) != pr->ps_pledge))) {
> +                     mtx_leave(&pr->ps_mtx);
>                       return (EPERM);
> +             }
>       }
>       if (SCARG(uap, execpromises)) {
> -             error = parsepledges(p, "pledgeexecreq",
> -                 SCARG(uap, execpromises), &execpromises);
> -             if (error)
> -                     return (error);
> -
>               /* Only permit reductions */
>               if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
> -                 (((execpromises | pr->ps_execpledge) != pr->ps_execpledge)))
> +                 (((execpromises | pr->ps_execpledge) != 
> pr->ps_execpledge))) {
> +                     mtx_leave(&pr->ps_mtx);
>                       return (EPERM);
> +             }
>       }
>  
> +     /* Set up promises */
>       if (SCARG(uap, promises)) {
>               pr->ps_pledge = promises;
>               atomic_setbits_int(&pr->ps_flags, PS_PLEDGE);
> -             /*
> -              * Kill off unveil and drop unveil vnode refs if we no
> -              * longer are holding any path-accessing pledge
> -              */
> +
>               if ((pr->ps_pledge & (PLEDGE_RPATH | PLEDGE_WPATH |
>                   PLEDGE_CPATH | PLEDGE_DPATH | PLEDGE_TMPPATH | PLEDGE_EXEC |
>                   PLEDGE_UNIX | PLEDGE_UNVEIL)) == 0)
> -                     unveil_destroy(pr);
> +                     unveil_cleanup = 1;
>       }
>       if (SCARG(uap, execpromises)) {
>               pr->ps_execpledge = execpromises;
>               atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
>       }
> +
> +     mtx_leave(&pr->ps_mtx);
> +
> +     if (unveil_cleanup) {
> +             /*
> +              * Kill off unveil and drop unveil vnode refs if we no
> +              * longer are holding any path-accessing pledge
> +              */
> +             KERNEL_LOCK();
> +             unveil_destroy(pr);
> +             KERNEL_UNLOCK();
> +     }
> +
>       return (0);
>  }
>  
>  int
>  pledge_syscall(struct proc *p, int code, uint64_t *tval)
>  {
> +     uint64_t pledge;
> +
>       p->p_pledge_syscall = code;
>       *tval = 0;
>  
> @@ -526,7 +552,8 @@ pledge_syscall(struct proc *p, int code,
>       if (pledge_syscalls[code] == PLEDGE_ALWAYS)
>               return (0);
>  
> -     if (p->p_p->ps_pledge & pledge_syscalls[code])
> +     pledge = pledge_get(p->p_p);;
> +     if (pledge & pledge_syscalls[code])
>               return (0);
>  
>       *tval = pledge_syscalls[code];
> @@ -549,7 +576,7 @@ pledge_fail(struct proc *p, int error, u
>       if (KTRPOINT(p, KTR_PLEDGE))
>               ktrpledge(p, error, code, p->p_pledge_syscall);
>  #endif
> -     if (p->p_p->ps_pledge & PLEDGE_ERROR)
> +     if (pledge_get(p->p_p) & PLEDGE_ERROR)
>               return (ENOSYS);
>  
>       KERNEL_LOCK();
> @@ -583,7 +610,7 @@ pledge_namei(struct proc *p, struct name
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
>           (p->p_p->ps_flags & PS_COREDUMP))
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = pledge_get(p->p_p);
>  
>       if (ni->ni_pledge == 0)
>               panic("pledge_namei: ni_pledge");
> @@ -704,11 +731,12 @@ pledge_namei(struct proc *p, struct name
>                                * XXX
>                                * The current hack for YP support in "getpw"
>                                * is to enable some "inet" features until
> -                              * next pledge call.  Setting a bit in ps_pledge
> -                              * is not safe with respect to multiple threads,
> -                              * a very different approach is needed.
> +                              * next pledge call.
>                                */
> +                             mtx_enter(&p->p_p->ps_mtx);
>                               p->p_p->ps_pledge |= PLEDGE_YPACTIVE;
> +                             mtx_leave(&p->p_p->ps_mtx);
> +                             pledge |= PLEDGE_YPACTIVE;
>                               ni->ni_cnd.cn_flags |= BYPASSUNVEIL;
>                               return (0);
>                       }
> @@ -770,7 +798,7 @@ pledge_recvfd(struct proc *p, struct fil
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     if ((p->p_p->ps_pledge & PLEDGE_RECVFD) == 0)
> +     if ((pledge_get(p->p_p) & PLEDGE_RECVFD) == 0)
>               return pledge_fail(p, EPERM, PLEDGE_RECVFD);
>  
>       switch (fp->f_type) {
> @@ -798,7 +826,7 @@ pledge_sendfd(struct proc *p, struct fil
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     if ((p->p_p->ps_pledge & PLEDGE_SENDFD) == 0)
> +     if ((pledge_get(p->p_p) & PLEDGE_SENDFD) == 0)
>               return pledge_fail(p, EPERM, PLEDGE_SENDFD);
>  
>       switch (fp->f_type) {
> @@ -826,7 +854,7 @@ pledge_sysctl(struct proc *p, int miblen
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = pledge_get(p->p_p);
>  
>       if (new)
>               return pledge_fail(p, EFAULT, 0);
> @@ -1023,7 +1051,7 @@ pledge_chown(struct proc *p, uid_t uid, 
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
>  
> -     if (p->p_p->ps_pledge & PLEDGE_CHOWNUID)
> +     if (pledge_get(p->p_p) & PLEDGE_CHOWNUID)
>               return (0);
>  
>       if (uid != -1 && uid != p->p_ucred->cr_uid)
> @@ -1041,7 +1069,7 @@ pledge_adjtime(struct proc *p, const voi
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
>  
> -     if ((p->p_p->ps_pledge & PLEDGE_SETTIME))
> +     if ((pledge_get(p->p_p) & PLEDGE_SETTIME))
>               return (0);
>       if (delta)
>               return (EPERM);
> @@ -1054,7 +1082,7 @@ pledge_sendit(struct proc *p, const void
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
>  
> -     if ((p->p_p->ps_pledge & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS | 
> PLEDGE_YPACTIVE)))
> +     if ((pledge_get(p->p_p) & (PLEDGE_INET | PLEDGE_UNIX | PLEDGE_DNS | 
> PLEDGE_YPACTIVE)))
>               return (0);             /* may use address */
>       if (to == NULL)
>               return (0);             /* behaves just like write */
> @@ -1070,7 +1098,7 @@ pledge_ioctl(struct proc *p, long com, s
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = pledge_get(p->p_p);
>  
>       /*
>        * The ioctl's which are always allowed.
> @@ -1365,7 +1393,7 @@ pledge_sockopt(struct proc *p, int set, 
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = pledge_get(p->p_p);
>  
>       /* Always allow these, which are too common to reject */
>       switch (level) {
> @@ -1515,7 +1543,7 @@ pledge_socket(struct proc *p, int domain
>  
>       if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
>               return 0;
> -     pledge = p->p_p->ps_pledge;
> +     pledge = pledge_get(p->p_p);
>  
>       if (ISSET(state, SS_DNS)) {
>               if (ISSET(pledge, PLEDGE_DNS))
> @@ -1548,7 +1576,7 @@ pledge_flock(struct proc *p)
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
>  
> -     if ((p->p_p->ps_pledge & PLEDGE_FLOCK))
> +     if ((pledge_get(p->p_p) & PLEDGE_FLOCK))
>               return (0);
>       return (pledge_fail(p, EPERM, PLEDGE_FLOCK));
>  }
> @@ -1585,7 +1613,7 @@ pledge_fcntl(struct proc *p, int cmd)
>  {
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     if ((p->p_p->ps_pledge & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
> +     if ((pledge_get(p->p_p) & PLEDGE_PROC) == 0 && cmd == F_SETOWN)
>               return pledge_fail(p, EPERM, PLEDGE_PROC);
>       return (0);
>  }
> @@ -1595,7 +1623,7 @@ pledge_kill(struct proc *p, pid_t pid)
>  {
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return 0;
> -     if (p->p_p->ps_pledge & PLEDGE_PROC)
> +     if (pledge_get(p->p_p) & PLEDGE_PROC)
>               return 0;
>       if (pid == 0 || pid == p->p_p->ps_pid)
>               return 0;
> @@ -1610,7 +1638,7 @@ pledge_protexec(struct proc *p, int prot
>       /* Before kbind(2) call, ld.so and crt may create EXEC mappings */
>       if (p->p_p->ps_kbind_addr == 0 && p->p_p->ps_kbind_cookie == 0)
>               return 0;
> -     if (!(p->p_p->ps_pledge & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
> +     if (!(pledge_get(p->p_p) & PLEDGE_PROTEXEC) && (prot & PROT_EXEC))
>               return pledge_fail(p, EPERM, PLEDGE_PROTEXEC);
>       return 0;
>  }
> Index: kern/syscalls.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/syscalls.c,v
> retrieving revision 1.236
> diff -u -p -r1.236 syscalls.c
> --- kern/syscalls.c   16 May 2022 07:38:10 -0000      1.236
> +++ kern/syscalls.c   28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: syscalls.c,v 1.236 2022/05/16 07:38:10 mvs Exp $      */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call names.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  const char *const syscallnames[] = {
> Index: kern/syscalls.master
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.225
> diff -u -p -r1.225 syscalls.master
> --- kern/syscalls.master      27 Jun 2022 14:26:05 -0000      1.225
> +++ kern/syscalls.master      28 Jun 2022 12:47:19 -0000
> @@ -228,7 +228,7 @@
>  106  STD             { int sys_listen(int s, int backlog); }
>  107  STD             { int sys_chflagsat(int fd, const char *path, \
>                           u_int flags, int atflags); }
> -108  STD             { int sys_pledge(const char *promises, \
> +108  STD NOLOCK      { int sys_pledge(const char *promises, \
>                           const char *execpromises); }
>  109  STD             { int sys_ppoll(struct pollfd *fds, \
>                           u_int nfds, const struct timespec *ts, \
> Index: sys/proc.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/proc.h,v
> retrieving revision 1.331
> diff -u -p -r1.331 proc.h
> --- sys/proc.h        27 Jun 2022 14:26:05 -0000      1.331
> +++ sys/proc.h        28 Jun 2022 15:23:12 -0000
> @@ -231,8 +231,8 @@ struct process {
>  
>       u_int32_t       ps_acflag;      /* Accounting flags. */
>  
> -     uint64_t ps_pledge;
> -     uint64_t ps_execpledge;
> +     uint64_t ps_pledge;             /* [m] pledge promises */
> +     uint64_t ps_execpledge;         /* [m] execpledge promises */
>  
>       int64_t ps_kbind_cookie;        /* [m] */
>       u_long  ps_kbind_addr;          /* [m] */
> @@ -566,6 +566,19 @@ refreshcreds(struct proc *p)
>       if (pr->ps_ucred != p->p_ucred)
>               dorefreshcreds(pr, p);
>  }
> +
> +static inline uint64_t
> +pledge_get(struct process *pr)
> +{
> +     uint64_t promises;
> +
> +     mtx_enter(&pr->ps_mtx);
> +     promises = pr->ps_pledge;
> +     mtx_leave(&pr->ps_mtx);
> +
> +     return promises;
> +}
> +
>  
>  enum single_thread_mode {
>       SINGLE_SUSPEND,         /* other threads to stop wherever they are */
> Index: sys/syscall.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscall.h,v
> retrieving revision 1.235
> diff -u -p -r1.235 syscall.h
> --- sys/syscall.h     27 Jun 2022 14:26:06 -0000      1.235
> +++ sys/syscall.h     28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: syscall.h,v 1.235 2022/06/27 14:26:06 cheloha Exp $   */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call numbers.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  /* syscall: "syscall" ret: "int" args: "int" "..." */
> Index: sys/syscall_mi.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscall_mi.h,v
> retrieving revision 1.25
> diff -u -p -r1.25 syscall_mi.h
> --- sys/syscall_mi.h  21 Jan 2020 16:16:23 -0000      1.25
> +++ sys/syscall_mi.h  28 Jun 2022 12:43:07 -0000
> @@ -89,16 +89,15 @@ mi_syscall(struct proc *p, register_t co
>           uvm_map_inentry_pc, p->p_vmspace->vm_map.wserial))
>               return (EPERM);
>  
> -     if (lock)
> -             KERNEL_LOCK();
>       pledged = (p->p_p->ps_flags & PS_PLEDGE);
>       if (pledged && (error = pledge_syscall(p, code, &tval))) {
> -             if (!lock)
> -                     KERNEL_LOCK();
> +             KERNEL_LOCK();
>               error = pledge_fail(p, error, tval);
>               KERNEL_UNLOCK();
>               return (error);
>       }
> +     if (lock)
> +             KERNEL_LOCK();
>       error = (*callp->sy_call)(p, argp, retval);
>       if (lock)
>               KERNEL_UNLOCK();
> Index: sys/syscallargs.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscallargs.h,v
> retrieving revision 1.238
> diff -u -p -r1.238 syscallargs.h
> --- sys/syscallargs.h 27 Jun 2022 14:26:06 -0000      1.238
> +++ sys/syscallargs.h 28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: syscallargs.h,v 1.238 2022/06/27 14:26:06 cheloha Exp $       
> */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call argument lists.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  #ifdef       syscallarg
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
> 

Reply via email to