Looks mostly good to me, except for one little nit and one question mark...

> Op 28-06-2022 23:12 schreef Jeremie Courreges-Anglas <j...@wxcvbn.org>:
> 
>  
> On Tue, Jun 28 2022, Martin Pieuchot <m...@openbsd.org> wrote:
> > On 28/06/22(Tue) 18:17, Jeremie Courreges-Anglas wrote:
> >> 
> >> Initially I just wandered in syscall_mi.h and found the locking scheme
> >> super weird, even if technically correct.  pledge_syscall() better be
> >> safe to call without the kernel lock so I don't understand why we're
> >> sometimes calling it with the kernel lock and sometimes not.
> >> 
> >> ps_pledge is 64 bits so it's not possible to unset bits in an atomic
> >> manner on all architectures.  Even if we're only removing bits and there
> >> is probably no way to see a completely garbage value, it makes sense to
> >> just protect ps_pledge (and ps_execpledge) in the usual manner so that
> >> we can unlock the syscall.  The diff below protects the fields using
> >> ps_mtx even though I initially used a dedicated ps_pledge_mtx.
> >> unveil_destroy() needs to be moved after the critical section.
> >> regress/sys/kern/pledge looks happy with this.  The sys/syscall_mi.h
> >> change can be committed in a separate step.
> >> 
> >> Input and oks welcome.
> >
> > This looks nice.  I doubt there's any existing program where you can
> > really test this.  Even firefox and chromium should do things
> > correctly.
> >
> > Maybe you should write a regress test that tries to break the kernel.
> 
> That would need some pondering, I'm not sure what kind of breakage
> you're envisioning.  The obvious check to perform would be to ensure
> that we're not increasing the promises but the code already does it.
> Hmm.
> 
> For now, here's the latest diff based on input from the hackroom, which
> basically amounts to:
> - don't take the mutex to read the value.  Even on 32 bits machines that
>   can't write an 8 byte value in a single go we don't expect garbage to
>   on a read crossing a write, since we only remove bits and never add
>   some.
> - use READ_ONCE when we're storing the value in a local variable, to
>   prevent possible reloading from memory.  In the current code it would
>   not be a problem but ensuring that we do not reload from memory is
>   just good practice for stuff that can be modified concurrently.
>   (This part can be committed in a second step, just like the
>   syscall_mi.h change.)
> 
> ok?
> 
> 
> Index: kern/init_sysent.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/init_sysent.c,v
> retrieving revision 1.238
> diff -u -p -r1.238 init_sysent.c
> --- kern/init_sysent.c        27 Jun 2022 14:26:05 -0000      1.238
> +++ kern/init_sysent.c        28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: init_sysent.c,v 1.238 2022/06/27 14:26:05 cheloha Exp $       
> */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call switch table.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  #include <sys/param.h>
> @@ -248,7 +248,7 @@ const struct sysent sysent[] = {
>           sys_listen },                       /* 106 = listen */
>       { 4, s(struct sys_chflagsat_args), 0,
>           sys_chflagsat },                    /* 107 = chflagsat */
> -     { 2, s(struct sys_pledge_args), 0,
> +     { 2, s(struct sys_pledge_args), SY_NOLOCK | 0,
>           sys_pledge },                       /* 108 = pledge */
>       { 4, s(struct sys_ppoll_args), 0,
>           sys_ppoll },                        /* 109 = ppoll */
> Index: kern/kern_pledge.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/kern_pledge.c,v
> retrieving revision 1.282
> diff -u -p -r1.282 kern_pledge.c
> --- kern/kern_pledge.c        26 Jun 2022 06:11:49 -0000      1.282
> +++ kern/kern_pledge.c        28 Jun 2022 17:00:51 -0000
> @@ -21,6 +21,7 @@
>  
>  #include <sys/mount.h>
>  #include <sys/proc.h>
> +#include <sys/mutex.h>
>  #include <sys/fcntl.h>
>  #include <sys/file.h>
>  #include <sys/filedesc.h>
> @@ -465,13 +466,26 @@ sys_pledge(struct proc *p, void *v, regi
>       struct process *pr = p->p_p;
>       uint64_t promises, execpromises;
>       int error;
> +     int unveil_cleanup = 0;
>  
> +     /* Check for any error in user input */
>       if (SCARG(uap, promises)) {
>               error = parsepledges(p, "pledgereq",
>                   SCARG(uap, promises), &promises);
>               if (error)
>                       return (error);
> +     }
> +     if (SCARG(uap, execpromises)) {
> +             error = parsepledges(p, "pledgeexecreq",
> +                 SCARG(uap, execpromises), &execpromises);
> +             if (error)
> +                     return (error);
> +     }
> +
> +     mtx_enter(&pr->ps_mtx);
>  
> +     /* Check for any error wrt current promises */
> +     if (SCARG(uap, promises)) {
>               /* In "error" mode, ignore promise increase requests,
>                * but accept promise decrease requests */
>               if (ISSET(pr->ps_flags, PS_PLEDGE) &&
> @@ -480,37 +494,47 @@ sys_pledge(struct proc *p, void *v, regi
>  
>               /* Only permit reductions */
>               if (ISSET(pr->ps_flags, PS_PLEDGE) &&
> -                 (((promises | pr->ps_pledge) != pr->ps_pledge)))
> +                 (((promises | pr->ps_pledge) != pr->ps_pledge))) {
> +                     mtx_leave(&pr->ps_mtx);
>                       return (EPERM);
> +             }
>       }
>       if (SCARG(uap, execpromises)) {
> -             error = parsepledges(p, "pledgeexecreq",
> -                 SCARG(uap, execpromises), &execpromises);
> -             if (error)
> -                     return (error);
> -
>               /* Only permit reductions */
>               if (ISSET(pr->ps_flags, PS_EXECPLEDGE) &&
> -                 (((execpromises | pr->ps_execpledge) != pr->ps_execpledge)))
> +                 (((execpromises | pr->ps_execpledge) != 
> pr->ps_execpledge))) {
> +                     mtx_leave(&pr->ps_mtx);
>                       return (EPERM);
> +             }
>       }
>  
> +     /* Set up promises */
>       if (SCARG(uap, promises)) {
>               pr->ps_pledge = promises;
>               atomic_setbits_int(&pr->ps_flags, PS_PLEDGE);
> -             /*
> -              * Kill off unveil and drop unveil vnode refs if we no
> -              * longer are holding any path-accessing pledge
> -              */
> +
>               if ((pr->ps_pledge & (PLEDGE_RPATH | PLEDGE_WPATH |
>                   PLEDGE_CPATH | PLEDGE_DPATH | PLEDGE_TMPPATH | PLEDGE_EXEC |
>                   PLEDGE_UNIX | PLEDGE_UNVEIL)) == 0)
> -                     unveil_destroy(pr);
> +                     unveil_cleanup = 1;
>       }
>       if (SCARG(uap, execpromises)) {
>               pr->ps_execpledge = execpromises;
>               atomic_setbits_int(&pr->ps_flags, PS_EXECPLEDGE);
>       }
> +
> +     mtx_leave(&pr->ps_mtx);
> +
> +     if (unveil_cleanup) {
> +             /*
> +              * Kill off unveil and drop unveil vnode refs if we no
> +              * longer are holding any path-accessing pledge
> +              */
> +             KERNEL_LOCK();
> +             unveil_destroy(pr);
> +             KERNEL_UNLOCK();
> +     }
> +
>       return (0);
>  }
>  
> @@ -583,7 +607,7 @@ pledge_namei(struct proc *p, struct name
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0 ||
>           (p->p_p->ps_flags & PS_COREDUMP))
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = READ_ONCE(p->p_p->ps_pledge);
>  
>       if (ni->ni_pledge == 0)
>               panic("pledge_namei: ni_pledge");
> @@ -704,11 +728,12 @@ pledge_namei(struct proc *p, struct name
>                                * XXX
>                                * The current hack for YP support in "getpw"
>                                * is to enable some "inet" features until
> -                              * next pledge call.  Setting a bit in ps_pledge
> -                              * is not safe with respect to multiple threads,
> -                              * a very different approach is needed.
> +                              * next pledge call.
>                                */
> +                             mtx_enter(&p->p_p->ps_mtx);
>                               p->p_p->ps_pledge |= PLEDGE_YPACTIVE;
> +                             mtx_leave(&p->p_p->ps_mtx);
> +                             pledge |= PLEDGE_YPACTIVE;

This appears to be a "dead store" since you're modifying a local variable that 
isn't used between here
and the return two lines below.

>                               ni->ni_cnd.cn_flags |= BYPASSUNVEIL;

Is it safe to modify these flags without holding the kernel lock?

>                               return (0);
>                       }
> @@ -826,7 +851,7 @@ pledge_sysctl(struct proc *p, int miblen
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = READ_ONCE(p->p_p->ps_pledge);
>  
>       if (new)
>               return pledge_fail(p, EFAULT, 0);
> @@ -1070,7 +1095,7 @@ pledge_ioctl(struct proc *p, long com, s
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = READ_ONCE(p->p_p->ps_pledge);
>  
>       /*
>        * The ioctl's which are always allowed.
> @@ -1365,7 +1390,7 @@ pledge_sockopt(struct proc *p, int set, 
>  
>       if ((p->p_p->ps_flags & PS_PLEDGE) == 0)
>               return (0);
> -     pledge = p->p_p->ps_pledge;
> +     pledge = READ_ONCE(p->p_p->ps_pledge);
>  
>       /* Always allow these, which are too common to reject */
>       switch (level) {
> @@ -1515,7 +1540,7 @@ pledge_socket(struct proc *p, int domain
>  
>       if (!ISSET(p->p_p->ps_flags, PS_PLEDGE))
>               return 0;
> -     pledge = p->p_p->ps_pledge;
> +     pledge = READ_ONCE(p->p_p->ps_pledge);
>  
>       if (ISSET(state, SS_DNS)) {
>               if (ISSET(pledge, PLEDGE_DNS))
> Index: kern/syscalls.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/syscalls.c,v
> retrieving revision 1.236
> diff -u -p -r1.236 syscalls.c
> --- kern/syscalls.c   16 May 2022 07:38:10 -0000      1.236
> +++ kern/syscalls.c   28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: syscalls.c,v 1.236 2022/05/16 07:38:10 mvs Exp $      */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call names.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  const char *const syscallnames[] = {
> Index: kern/syscalls.master
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.225
> diff -u -p -r1.225 syscalls.master
> --- kern/syscalls.master      27 Jun 2022 14:26:05 -0000      1.225
> +++ kern/syscalls.master      28 Jun 2022 12:47:19 -0000
> @@ -228,7 +228,7 @@
>  106  STD             { int sys_listen(int s, int backlog); }
>  107  STD             { int sys_chflagsat(int fd, const char *path, \
>                           u_int flags, int atflags); }
> -108  STD             { int sys_pledge(const char *promises, \
> +108  STD NOLOCK      { int sys_pledge(const char *promises, \
>                           const char *execpromises); }
>  109  STD             { int sys_ppoll(struct pollfd *fds, \
>                           u_int nfds, const struct timespec *ts, \
> Index: sys/proc.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/proc.h,v
> retrieving revision 1.331
> diff -u -p -r1.331 proc.h
> --- sys/proc.h        27 Jun 2022 14:26:05 -0000      1.331
> +++ sys/proc.h        28 Jun 2022 16:55:24 -0000
> @@ -231,8 +231,8 @@ struct process {
>  
>       u_int32_t       ps_acflag;      /* Accounting flags. */
>  
> -     uint64_t ps_pledge;
> -     uint64_t ps_execpledge;
> +     uint64_t ps_pledge;             /* [m] pledge promises */
> +     uint64_t ps_execpledge;         /* [m] execpledge promises */
>  
>       int64_t ps_kbind_cookie;        /* [m] */
>       u_long  ps_kbind_addr;          /* [m] */
> Index: sys/syscall.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscall.h,v
> retrieving revision 1.235
> diff -u -p -r1.235 syscall.h
> --- sys/syscall.h     27 Jun 2022 14:26:06 -0000      1.235
> +++ sys/syscall.h     28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: syscall.h,v 1.235 2022/06/27 14:26:06 cheloha Exp $   */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call numbers.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  /* syscall: "syscall" ret: "int" args: "int" "..." */
> Index: sys/syscall_mi.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscall_mi.h,v
> retrieving revision 1.25
> diff -u -p -r1.25 syscall_mi.h
> --- sys/syscall_mi.h  21 Jan 2020 16:16:23 -0000      1.25
> +++ sys/syscall_mi.h  28 Jun 2022 12:43:07 -0000
> @@ -89,16 +89,15 @@ mi_syscall(struct proc *p, register_t co
>           uvm_map_inentry_pc, p->p_vmspace->vm_map.wserial))
>               return (EPERM);
>  
> -     if (lock)
> -             KERNEL_LOCK();
>       pledged = (p->p_p->ps_flags & PS_PLEDGE);
>       if (pledged && (error = pledge_syscall(p, code, &tval))) {
> -             if (!lock)
> -                     KERNEL_LOCK();
> +             KERNEL_LOCK();
>               error = pledge_fail(p, error, tval);
>               KERNEL_UNLOCK();
>               return (error);
>       }
> +     if (lock)
> +             KERNEL_LOCK();
>       error = (*callp->sy_call)(p, argp, retval);
>       if (lock)
>               KERNEL_UNLOCK();
> Index: sys/syscallargs.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/syscallargs.h,v
> retrieving revision 1.238
> diff -u -p -r1.238 syscallargs.h
> --- sys/syscallargs.h 27 Jun 2022 14:26:06 -0000      1.238
> +++ sys/syscallargs.h 28 Jun 2022 15:18:25 -0000
> @@ -1,10 +1,10 @@
> -/*   $OpenBSD: syscallargs.h,v 1.238 2022/06/27 14:26:06 cheloha Exp $       
> */
> +/*   $OpenBSD$       */
>  
>  /*
>   * System call argument lists.
>   *
>   * DO NOT EDIT-- this file is automatically generated.
> - * created from;     OpenBSD: syscalls.master,v 1.224 2022/05/16 07:36:04 
> mvs Exp 
> + * created from;     OpenBSD: syscalls.master,v 1.225 2022/06/27 14:26:05 
> cheloha Exp 
>   */
>  
>  #ifdef       syscallarg
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE

Reply via email to