On Tue, Jun 16, 2020 at 09:10:54PM +0200, Anton Lindqvist wrote:
> Hi,
> Instead of performing three distinct allocations per created pipe,
> reduce it to a single one. Not only should this be more performant, it
> also solves a kqueue related issue found by visa@ who also requested
> this change:
> 
> > If you attach an EVFILT_WRITE filter to a pipe fd, the knote gets added
> > to the peer's klist. This is a problem for kqueue  because if you close
> > the peer's fd, the knote is left in the list whose head is about to be
> > freed. knote_fdclose() is not able to clear the knote because it is not
> > registered with the peer's fd.
> 
> FreeBSD also takes a similar approach to pipe allocations.
> 
> Comments? OK?

This was backed out since it contained bug. Updated diff below in which
pipe_buffer_free() is called while holding the pipe lock in
pipe_destroy(). This prevents another thread from freeing the pipe_pair
structure while another thread is sleeping inside pipe_buffer_free().

Note that pipe_buffer_free() is also called from pipe_buffer_realloc().
The pipe is then either being created, i.e. we have exclusive access at
this point or the pipe lock is already acquired.

Comments? OK?

Index: kern/sys_pipe.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.122
diff -u -p -r1.122 sys_pipe.c
--- kern/sys_pipe.c     19 Jun 2020 02:08:48 -0000      1.122
+++ kern/sys_pipe.c     22 Jun 2020 19:23:56 -0000
@@ -49,6 +49,12 @@
 
 #include <sys/pipe.h>
 
+struct pipe_pair {
+       struct pipe pp_wpipe;
+       struct pipe pp_rpipe;
+       struct rwlock pp_lock;
+};
+
 /*
  * interfaces to the outside world
  */
@@ -103,13 +109,12 @@ const struct filterops pipe_wfiltops = {
 unsigned int nbigpipe;
 static unsigned int amountpipekva;
 
-struct pool pipe_pool;
-struct pool pipe_lock_pool;
+struct pool pipe_pair_pool;
 
 int    dopipe(struct proc *, int *, int);
 void   pipeselwakeup(struct pipe *);
 
-struct pipe *pipe_create(void);
+int    pipe_create(struct pipe *);
 void   pipe_destroy(struct pipe *);
 int    pipe_rundown(struct pipe *);
 struct pipe *pipe_peer(struct pipe *);
@@ -120,6 +125,8 @@ int pipe_iolock(struct pipe *);
 void   pipe_iounlock(struct pipe *);
 int    pipe_iosleep(struct pipe *, const char *);
 
+struct pipe_pair *pipe_pair_create(void);
+
 /*
  * The pipe system call for the DTYPE_PIPE type of pipes
  */
@@ -153,33 +160,17 @@ dopipe(struct proc *p, int *ufds, int fl
 {
        struct filedesc *fdp = p->p_fd;
        struct file *rf, *wf;
+       struct pipe_pair *pp;
        struct pipe *rpipe, *wpipe = NULL;
-       struct rwlock *lock;
        int fds[2], cloexec, error;
 
        cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
 
-       if ((rpipe = pipe_create()) == NULL) {
-               error = ENOMEM;
-               goto free1;
-       }
-
-       /*
-        * One lock is used per pipe pair in order to obtain exclusive access to
-        * the pipe pair.
-        */
-       lock = pool_get(&pipe_lock_pool, PR_WAITOK);
-       rw_init(lock, "pipelk");
-       rpipe->pipe_lock = lock;
-
-       if ((wpipe = pipe_create()) == NULL) {
-               error = ENOMEM;
-               goto free1;
-       }
-       wpipe->pipe_lock = lock;
-
-       rpipe->pipe_peer = wpipe;
-       wpipe->pipe_peer = rpipe;
+       pp = pipe_pair_create();
+       if (pp == NULL)
+               return (ENOMEM);
+       wpipe = &pp->pp_wpipe;
+       rpipe = &pp->pp_rpipe;
 
        fdplock(fdp);
 
@@ -226,7 +217,6 @@ free3:
        rpipe = NULL;
 free2:
        fdpunlock(fdp);
-free1:
        pipe_destroy(wpipe);
        pipe_destroy(rpipe);
        return (error);
@@ -272,19 +262,14 @@ pipe_buffer_realloc(struct pipe *cpipe, 
 /*
  * initialize and allocate VM and memory for pipe
  */
-struct pipe *
-pipe_create(void)
+int
+pipe_create(struct pipe *cpipe)
 {
-       struct pipe *cpipe;
        int error;
 
-       cpipe = pool_get(&pipe_pool, PR_WAITOK | PR_ZERO);
-
        error = pipe_buffer_realloc(cpipe, PIPE_SIZE);
-       if (error != 0) {
-               pool_put(&pipe_pool, cpipe);
-               return (NULL);
-       }
+       if (error != 0)
+               return (error);
 
        sigio_init(&cpipe->pipe_sigio);
 
@@ -292,7 +277,7 @@ pipe_create(void)
        cpipe->pipe_atime = cpipe->pipe_ctime;
        cpipe->pipe_mtime = cpipe->pipe_ctime;
 
-       return (cpipe);
+       return (0);
 }
 
 struct pipe *
@@ -834,7 +819,6 @@ void
 pipe_destroy(struct pipe *cpipe)
 {
        struct pipe *ppipe;
-       struct rwlock *lock = NULL;
 
        if (cpipe == NULL)
                return;
@@ -862,20 +846,14 @@ pipe_destroy(struct pipe *cpipe)
                ppipe->pipe_state |= PIPE_EOF;
                wakeup(ppipe);
                ppipe->pipe_peer = NULL;
-       } else {
-               /*
-                * Peer already gone. This is last reference to the pipe lock
-                * and it must therefore be freed below.
-                */
-               lock = cpipe->pipe_lock;
        }
 
+       pipe_buffer_free(cpipe);
+
        rw_exit_write(cpipe->pipe_lock);
 
-       pipe_buffer_free(cpipe);
-       if (lock != NULL)
-               pool_put(&pipe_lock_pool, lock);
-       pool_put(&pipe_pool, cpipe);
+       if (ppipe == NULL)
+               pool_put(&pipe_pair_pool, cpipe->pipe_pair);
 }
 
 /*
@@ -1008,8 +986,33 @@ filt_pipewrite(struct knote *kn, long hi
 void
 pipe_init(void)
 {
-       pool_init(&pipe_pool, sizeof(struct pipe), 0, IPL_MPFLOOR, PR_WAITOK,
-           "pipepl", NULL);
-       pool_init(&pipe_lock_pool, sizeof(struct rwlock), 0, IPL_MPFLOOR,
-           PR_WAITOK, "pipelkpl", NULL);
+       pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR,
+           PR_WAITOK, "pipepl", NULL);
+}
+
+struct pipe_pair *
+pipe_pair_create(void)
+{
+       struct pipe_pair *pp;
+
+       pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO);
+       pp->pp_wpipe.pipe_pair = pp;
+       pp->pp_rpipe.pipe_pair = pp;
+       pp->pp_wpipe.pipe_peer = &pp->pp_rpipe;
+       pp->pp_rpipe.pipe_peer = &pp->pp_wpipe;
+       /*
+        * One lock is used per pipe pair in order to obtain exclusive access to
+        * the pipe pair.
+        */
+       rw_init(&pp->pp_lock, "pipelk");
+       pp->pp_wpipe.pipe_lock = &pp->pp_lock;
+       pp->pp_rpipe.pipe_lock = &pp->pp_lock;
+
+       if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe))
+               goto err;
+       return (pp);
+err:
+       pipe_destroy(&pp->pp_wpipe);
+       pipe_destroy(&pp->pp_rpipe);
+       return (NULL);
 }
Index: sys/pipe.h
===================================================================
RCS file: /cvs/src/sys/sys/pipe.h,v
retrieving revision 1.26
diff -u -p -r1.26 pipe.h
--- sys/pipe.h  19 Jun 2020 02:08:48 -0000      1.26
+++ sys/pipe.h  22 Jun 2020 19:23:56 -0000
@@ -67,6 +67,8 @@ struct pipebuf {
 #define PIPE_LOCK      0x100   /* Thread has exclusive I/O access. */
 #define PIPE_LWANT     0x200   /* Thread wants exclusive I/O access. */
 
+struct pipe_pair;
+
 /*
  * Per-pipe data structure.
  * Two of these are linked together to produce bi-directional pipes.
@@ -85,6 +87,7 @@ struct pipe {
        struct  timespec pipe_ctime;    /* [I] time of status change */
        struct  sigio_ref pipe_sigio;   /* [S] async I/O registration */
        struct  pipe *pipe_peer;        /* [p] link with other direction */
+       struct  pipe_pair *pipe_pair;   /* [I] pipe storage */
        u_int   pipe_state;             /* [p] pipe status info */
        int     pipe_busy;              /* [p] # readers/writers */
 };

Reply via email to