> Date: Tue, 31 Jul 2018 14:55:45 -0300
> From: Martin Pieuchot <m...@openbsd.org>
> 
> On 26/06/18(Tue) 12:31, Martin Pieuchot wrote:
> > These syscalls do two operations: they allocate FDs and some buffers.
> > Thanks to the work done for sockets the file-related bits are now
> > mp-safe, so we can push the KERNEL_LOCK() down.
> > 
> > Diff below adds some KERNEL_LOCK/UNLOCK() dances around km_alloc(9) and
> > km_free(9).  It also makes some counters use atomic operations.
> > 
> > My goals for this diff are to prepare the terrain to unlock read(2) &
> > write(2) for pipes and push the KERNEL_LOCK() at UVM borders, to
> > hopefully motivate other hackers to push further down!
> 
> The only objection that I received to this diff is that `fd_ofilesflags'
> wasn't always protected.  Which meant that fdexpand() couldn't be taken
> out of the KERNEL_LOCK().  This has been fixed since then.  So any other
> comment or ok?
> 
> Index: kern/sys_pipe.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_pipe.c,v
> retrieving revision 1.81
> diff -u -p -r1.81 sys_pipe.c
> --- kern/sys_pipe.c   18 Jun 2018 09:15:05 -0000      1.81
> +++ kern/sys_pipe.c   26 Jun 2018 10:27:51 -0000
> @@ -91,8 +91,8 @@ struct filterops pipe_wfiltops =
>   * Limit the number of "big" pipes
>   */
>  #define LIMITBIGPIPES        32
> -int nbigpipe;
> -static int amountpipekva;
> +unsigned int nbigpipe;
> +static unsigned int amountpipekva;
>  
>  struct pool pipe_pool;
>  
> @@ -214,7 +214,9 @@ pipespace(struct pipe *cpipe, u_int size
>  {
>       caddr_t buffer;
>  
> +     KERNEL_LOCK();
>       buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok);
> +     KERNEL_UNLOCK();
>       if (buffer == NULL) {
>               return (ENOMEM);
>       }
> @@ -227,7 +229,7 @@ pipespace(struct pipe *cpipe, u_int size
>       cpipe->pipe_buffer.out = 0;
>       cpipe->pipe_buffer.cnt = 0;
>  
> -     amountpipekva += cpipe->pipe_buffer.size;
> +     atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
>  
>       return (0);
>  }
> @@ -443,14 +445,13 @@ pipe_write(struct file *fp, off_t *poff,
>        * If it is advantageous to resize the pipe buffer, do
>        * so.
>        */
> -     if ((uio->uio_resid > PIPE_SIZE) &&
> -         (nbigpipe < LIMITBIGPIPES) &&
> +     if ((uio->uio_resid > PIPE_SIZE) && (nbigpipe < LIMITBIGPIPES) &&
>           (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
>           (wpipe->pipe_buffer.cnt == 0)) {
>  
>               if ((error = pipelock(wpipe)) == 0) {
>                       if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
> -                             nbigpipe++;
> +                             atomic_inc_int(&nbigpipe);

There is a TOCTTOU issue here.  Threads can run through here in
parallel and nbigpipe can become larger than LIMITBIGPIPES.  You need
to use atomic_inc_int_nv() and back off if you pushed it over the
limit.

>                       pipeunlock(wpipe);
>               }
>       }
> @@ -753,12 +754,15 @@ pipe_close(struct file *fp, struct proc 
>  void
>  pipe_free_kmem(struct pipe *cpipe)
>  {
> +     u_int size = cpipe->pipe_buffer.size;
> +
>       if (cpipe->pipe_buffer.buffer != NULL) {
> -             if (cpipe->pipe_buffer.size > PIPE_SIZE)
> -                     --nbigpipe;
> -             amountpipekva -= cpipe->pipe_buffer.size;
> -             km_free(cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size,
> -                 &kv_any, &kp_pageable);
> +             if (size > PIPE_SIZE)
> +                     atomic_dec_int(&nbigpipe);
> +             atomic_sub_int(&amountpipekva, size);
> +             KERNEL_LOCK();
> +             km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable);
> +             KERNEL_UNLOCK();
>               cpipe->pipe_buffer.buffer = NULL;
>       }
>  }
> @@ -771,7 +775,6 @@ pipeclose(struct pipe *cpipe)
>  {
>       struct pipe *ppipe;
>       if (cpipe) {
> -             
>               pipeselwakeup(cpipe);
>  
>               /*
> Index: kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.182
> diff -u -p -r1.182 syscalls.master
> --- kern/syscalls.master      20 Jun 2018 10:52:49 -0000      1.182
> +++ kern/syscalls.master      26 Jun 2018 09:56:44 -0000
> @@ -217,7 +217,7 @@
>                           socklen_t namelen); }
>  99   STD             { int sys_getdents(int fd, void *buf, size_t buflen); }
>  100  STD             { int sys_getpriority(int which, id_t who); }
> -101  STD             { int sys_pipe2(int *fdp, int flags); }
> +101  STD NOLOCK      { int sys_pipe2(int *fdp, int flags); }
>  102  STD             { int sys_dup3(int from, int to, int flags); }
>  103  STD             { int sys_sigreturn(struct sigcontext *sigcntxp); }
>  104  STD             { int sys_bind(int s, const struct sockaddr *name, \
> @@ -447,7 +447,7 @@
>  260  UNIMPL
>  261  UNIMPL
>  262  UNIMPL
> -263  STD             { int sys_pipe(int *fdp); }
> +263  STD NOLOCK      { int sys_pipe(int *fdp); }
>  264  STD             { int sys_fhopen(const fhandle_t *fhp, int flags); }
>  265  UNIMPL
>  266  UNIMPL
> 
> 

Reply via email to