On Thu, Nov 11, 2021 at 12:42:36AM +0300, Vitaliy Makkoveev wrote:
> On Wed, Nov 10, 2021 at 10:03:48PM +0100, Alexander Bluhm wrote:
> > On Tue, Oct 26, 2021 at 02:12:36PM +0300, Vitaliy Makkoveev wrote:
> > > --- sys/kern/uipc_socket.c        14 Oct 2021 23:05:10 -0000      1.265
> > > +++ sys/kern/uipc_socket.c        26 Oct 2021 11:05:59 -0000
> > > @@ -315,6 +315,8 @@ soclose(struct socket *so, int flags)
> > >   /* Revoke async IO early. There is a final revocation in sofree(). */
> > >   sigio_free(&so->so_sigio);
> > >   if (so->so_options & SO_ACCEPTCONN) {
> > > +         so->so_options &= ~SO_ACCEPTCONN;
> > > +
> > >           while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
> > >                   (void) soqremque(so2, 0);
> > >                   (void) soabort(so2);
> > 
> > I don't like this.  A listening socket can never be converted to a
> > non-listening socket.  Whatever this transition means in th TCP
> > layer.  If you want to mark a listening socket as closing to avoid
> > accepting connections, use a approriate flag.  Do not convert it
> > to an non-listening socket that may have races elsewhere.
> > 
> > I would say some so_state bits may be suitable if we really need
> > this feature.
> > 
> 
> This is temporary solution. I want to reorder soclose() to destroy PCB
> before `so_q0' and `so_q' cleanup, so this 'SO_ACCEPTCONN' modification
> will be removed. This is required because the relock dances will
> appeared in socose() and unp_detach() for the per-socket locking.
> 
> We don't release netlock in the inet sockets case, so they are not
> affected.
> 
> What about 'SO_DYING' flag? Anyway I want to remove it with the next
> diff.
> 
> > > - if (unp->unp_vnode) {
> > > +
> > > + if (vp) {
> > 
> > Please use (vp != NULL) for consistency.
> > 
> 
> No problem, but I made "if (vp)" without NULL for the consistency with
> "if (unp->unp_vnode)" in this function. Should they have the same
> notation?
> 
> > > +         unp->unp_vnode = NULL;
> > > +
> > >           /*
> > > -          * `v_socket' is only read in unp_connect and
> > > -          * unplock prevents concurrent access.
> > > +          * Enforce `i_lock' -> `unp_lock' because fifo
> > > +          * subsystem requires it.
> > >            */
> > >  
> > > -         unp->unp_vnode->v_socket = NULL;
> > > -         vp = unp->unp_vnode;
> > > -         unp->unp_vnode = NULL;
> > > +         sounlock(so, SL_LOCKED);
> > > +
> > > +         VOP_LOCK(vp, LK_EXCLUSIVE);
> > > +         vp->v_socket = NULL;
> > > +
> > > +         KERNEL_LOCK();
> > > +         vput(vp);
> > > +         KERNEL_UNLOCK();
> > > +
> > > +         solock(so);
> > >   }
> > 
> > This might work, we should try it.
> > 
> > > @@ -500,21 +510,6 @@ unp_detach(struct unpcb *unp)
> > >   pool_put(&unpcb_pool, unp);
> > >   if (unp_rights)
> > >           task_add(systqmp, &unp_gc_task);
> > > -
> > > - if (vp != NULL) {
> > > -         /*
> > > -          * Enforce `i_lock' -> `unplock' because fifo subsystem
> > > -          * requires it. The socket can't be closed concurrently
> > > -          * because the file descriptor reference is
> > > -          * still hold.
> > > -          */
> > > -
> > > -         sounlock(so, SL_LOCKED);
> > > -         KERNEL_LOCK();
> > > -         vrele(vp);
> > > -         KERNEL_UNLOCK();
> > > -         solock(so);
> > > - }
> > >  }
> > 
> > Why did you move the lock dance to the beginning of the function?
> > Does it matter?
> > 
> 
> Yes, I want to disconnect dying socket from the file system before start
> it's destruction. This does matter because in some cases we need to
> release the solock() before acquire solock() for two sockets to keep
> lock order between them. With listening socket disconnected from the
> file system we can't have concurrent threads performing connect to dying
> socket.
> 
> Also this is the reason I want to reorder listening socket destruction
> in the soclose().
> 
> OK for updated diff?
> 

Can I propose to commit this diff before? It reorders soclose() to
destroy PCB before `so_q0' and `so_q' cleanup.

The dying socket is already unlinked from the file descriptor layer, but
still accessible from the stack or from the file system layer. When we
release solock() while processing `so_q0' or `so_q' queues or when
performing (*pr_detach)(), concurrent thread could perform connection.
This reorder prevents this. The reorder will be required for the
upcoming fine grained locking diffs.

According the current code we assume the `so_q0' or `so_q' queues
cleanup could be done with destroyed PCB so this diff doesn't modify
existing logic. With this diff committed I don't need to modify
`so_state' in the unp_detach() reorder diff which started this thread.

Also the tool to test the diff:

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdio.h>
#include <stdlib.h>
#include <err.h>
#include <pthread.h>
#include <string.h>
#include <unistd.h>

#include <arpa/inet.h>
#include <netinet/in.h>

#define NTHR (10)
#define NCONNECT (10)
#define NACCEPT ((NTHR * NCONNECT) / 2)

static void *
thr_conn(void *arg)
{
        struct sockaddr_storage *ss = arg;
        int s[NCONNECT];
        size_t hd = 0, tl = 0;

        while (1) {
                if ((s[hd] = socket(ss->ss_family, SOCK_STREAM, 0)) < 0)
                        continue;

                if (connect(s[hd], (struct sockaddr *)ss, ss->ss_len) < 0) {
                        close(s[hd]);
                        continue;
                }

                if ((hd = (hd + 1) % NCONNECT) == tl) {
                        close(s[tl]);
                        tl = (tl + 1) % NCONNECT;
                }
        }

        return NULL;
}

static void
usage(void)
{
        extern char *__progname;

        fprintf(stderr, "Usage %s: unix | inet | inet6\n", __progname);
        exit(1);
}

int
main(int argc, char *argv[])
{
        struct sockaddr_storage ss;
        size_t i;
        int s;

        if (argc != 2) {
                usage();
        }

        memset(&ss, 0, sizeof(ss));

        if (strcmp(argv[1], "unix") == 0) {
                struct sockaddr_un *sun;

                sun = (struct sockaddr_un *)&ss;
                sun->sun_len = sizeof(*sun);
                sun->sun_family = AF_UNIX;
                snprintf(sun->sun_path, sizeof(sun->sun_path) - 1,
                        "/tmp/socket%d", getpid());
        } else if (strcmp(argv[1], "inet") == 0) {
                struct sockaddr_in *sin;

                sin = (struct sockaddr_in *)&ss;
                sin->sin_len = sizeof(*sin);
                sin->sin_family = AF_INET;
                sin->sin_port = htons(10000+(getpid()%10000));
                if (inet_pton(AF_INET, "127.0.0.1", &sin->sin_addr) <= 0)
                        errx(1, "inet_pton()");
        } else if (strcmp(argv[1], "inet6") == 0) {
                struct sockaddr_in6 *sin6;

                sin6 = (struct sockaddr_in6 *)&ss;
                sin6->sin6_len = sizeof(*sin6);
                sin6->sin6_family = AF_INET6;
                sin6->sin6_port = htons(10000+(getpid()%10000));
                if (inet_pton(AF_INET6, "::1", &sin6->sin6_addr) <= 0)
                        errx(1, "inet_pton()");
        } else
                usage();

        for (i = 0; i < NTHR; ++i) {
                pthread_t thr;
                int error;

                if ((error = pthread_create(&thr, NULL, thr_conn, &ss)))
                        errc(1, error, "pthread_create()");
        }

        while(1) {
                int conns[NACCEPT];

                if ((s = socket(ss.ss_family, SOCK_STREAM, 0)) < 0)
                        err(1, "socket()");

                if (ss.ss_family == AF_UNIX) {
                        struct sockaddr_un *sun = (struct sockaddr_un *)&ss;
                        unlink(sun->sun_path);
                } else {
                        int opt = 1;

                        if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
                            &opt, sizeof(opt)) < 0)
                                err(1, "setsockopt()");
                }

                if (bind(s, (struct sockaddr *)&ss, ss.ss_len) < 0)
                        err(1, "bind()");

                if (listen(s, 10) < 0)
                        err(1, "listen()");

                for(i=0; i < NACCEPT; ++i) {
                        if ((conns[i] = accept(s, NULL, NULL)) < 0)
                                err(1, "accept()");
                }

                close(s);

                for(i=0; i < NACCEPT; ++i) {
                        close(conns[i]);
                }
        }

        return 0;
}

Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.267
diff -u -p -r1.267 uipc_socket.c
--- sys/kern/uipc_socket.c      24 Oct 2021 07:02:47 -0000      1.267
+++ sys/kern/uipc_socket.c      11 Nov 2021 08:56:17 -0000
@@ -322,19 +322,9 @@ soclose(struct socket *so, int flags)
        s = solock(so);
        /* Revoke async IO early. There is a final revocation in sofree(). */
        sigio_free(&so->so_sigio);
-       if (so->so_options & SO_ACCEPTCONN) {
-               while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
-                       (void) soqremque(so2, 0);
-                       (void) soabort(so2);
-               }
-               while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
-                       (void) soqremque(so2, 1);
-                       (void) soabort(so2);
-               }
-       }
-       if (so->so_pcb == NULL)
-               goto discard;
        if (so->so_state & SS_ISCONNECTED) {
+               if (so->so_pcb == NULL)
+                       goto discard;
                if ((so->so_state & SS_ISDISCONNECTING) == 0) {
                        error = sodisconnect(so);
                        if (error)
@@ -360,6 +350,16 @@ drop:
                error2 = (*so->so_proto->pr_detach)(so);
                if (error == 0)
                        error = error2;
+       }
+       if (so->so_options & SO_ACCEPTCONN) {
+               while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
+                       (void) soqremque(so2, 0);
+                       (void) soabort(so2);
+               }
+               while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
+                       (void) soqremque(so2, 1);
+                       (void) soabort(so2);
+               }
        }
 discard:
        if (so->so_state & SS_NOFDREF)

Reply via email to