On Wed, Nov 10, 2021 at 10:03:48PM +0100, Alexander Bluhm wrote:
> On Tue, Oct 26, 2021 at 02:12:36PM +0300, Vitaliy Makkoveev wrote:
> > --- sys/kern/uipc_socket.c  14 Oct 2021 23:05:10 -0000      1.265
> > +++ sys/kern/uipc_socket.c  26 Oct 2021 11:05:59 -0000
> > @@ -315,6 +315,8 @@ soclose(struct socket *so, int flags)
> >     /* Revoke async IO early. There is a final revocation in sofree(). */
> >     sigio_free(&so->so_sigio);
> >     if (so->so_options & SO_ACCEPTCONN) {
> > +           so->so_options &= ~SO_ACCEPTCONN;
> > +
> >             while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
> >                     (void) soqremque(so2, 0);
> >                     (void) soabort(so2);
> 
> I don't like this.  A listening socket can never be converted to a
> non-listening socket.  Whatever this transition means in th TCP
> layer.  If you want to mark a listening socket as closing to avoid
> accepting connections, use a approriate flag.  Do not convert it
> to an non-listening socket that may have races elsewhere.
> 
> I would say some so_state bits may be suitable if we really need
> this feature.
> 

This is temporary solution. I want to reorder soclose() to destroy PCB
before `so_q0' and `so_q' cleanup, so this 'SO_ACCEPTCONN' modification
will be removed. This is required because the relock dances will
appeared in socose() and unp_detach() for the per-socket locking.

We don't release netlock in the inet sockets case, so they are not
affected.

What about 'SO_DYING' flag? Anyway I want to remove it with the next
diff.

> > -   if (unp->unp_vnode) {
> > +
> > +   if (vp) {
> 
> Please use (vp != NULL) for consistency.
> 

No problem, but I made "if (vp)" without NULL for the consistency with
"if (unp->unp_vnode)" in this function. Should they have the same
notation?

> > +           unp->unp_vnode = NULL;
> > +
> >             /*
> > -            * `v_socket' is only read in unp_connect and
> > -            * unplock prevents concurrent access.
> > +            * Enforce `i_lock' -> `unp_lock' because fifo
> > +            * subsystem requires it.
> >              */
> >  
> > -           unp->unp_vnode->v_socket = NULL;
> > -           vp = unp->unp_vnode;
> > -           unp->unp_vnode = NULL;
> > +           sounlock(so, SL_LOCKED);
> > +
> > +           VOP_LOCK(vp, LK_EXCLUSIVE);
> > +           vp->v_socket = NULL;
> > +
> > +           KERNEL_LOCK();
> > +           vput(vp);
> > +           KERNEL_UNLOCK();
> > +
> > +           solock(so);
> >     }
> 
> This might work, we should try it.
> 
> > @@ -500,21 +510,6 @@ unp_detach(struct unpcb *unp)
> >     pool_put(&unpcb_pool, unp);
> >     if (unp_rights)
> >             task_add(systqmp, &unp_gc_task);
> > -
> > -   if (vp != NULL) {
> > -           /*
> > -            * Enforce `i_lock' -> `unplock' because fifo subsystem
> > -            * requires it. The socket can't be closed concurrently
> > -            * because the file descriptor reference is
> > -            * still hold.
> > -            */
> > -
> > -           sounlock(so, SL_LOCKED);
> > -           KERNEL_LOCK();
> > -           vrele(vp);
> > -           KERNEL_UNLOCK();
> > -           solock(so);
> > -   }
> >  }
> 
> Why did you move the lock dance to the beginning of the function?
> Does it matter?
> 

Yes, I want to disconnect dying socket from the file system before start
it's destruction. This does matter because in some cases we need to
release the solock() before acquire solock() for two sockets to keep
lock order between them. With listening socket disconnected from the
file system we can't have concurrent threads performing connect to dying
socket.

Also this is the reason I want to reorder listening socket destruction
in the soclose().

OK for updated diff?

Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.268
diff -u -p -r1.268 uipc_socket.c
--- sys/kern/uipc_socket.c      6 Nov 2021 05:26:33 -0000       1.268
+++ sys/kern/uipc_socket.c      10 Nov 2021 21:33:01 -0000
@@ -334,6 +334,8 @@ soclose(struct socket *so, int flags)
        /* Revoke async IO early. There is a final revocation in sofree(). */
        sigio_free(&so->so_sigio);
        if (so->so_options & SO_ACCEPTCONN) {
+               so->so_options |= SO_DYING;
+
                while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
                        (void) soqremque(so2, 0);
                        (void) soabort(so2);
Index: sys/kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.154
diff -u -p -r1.154 uipc_usrreq.c
--- sys/kern/uipc_usrreq.c      6 Nov 2021 17:35:14 -0000       1.154
+++ sys/kern/uipc_usrreq.c      10 Nov 2021 21:33:01 -0000
@@ -485,20 +485,30 @@ void
 unp_detach(struct unpcb *unp)
 {
        struct socket *so = unp->unp_socket;
-       struct vnode *vp = NULL;
+       struct vnode *vp = unp->unp_vnode;
 
        rw_assert_wrlock(&unp_lock);
 
        LIST_REMOVE(unp, unp_link);
-       if (unp->unp_vnode) {
+
+       if (vp != NULL) {
+               unp->unp_vnode = NULL;
+
                /*
-                * `v_socket' is only read in unp_connect and
-                * unplock prevents concurrent access.
+                * Enforce `i_lock' -> `unp_lock' because fifo
+                * subsystem requires it.
                 */
 
-               unp->unp_vnode->v_socket = NULL;
-               vp = unp->unp_vnode;
-               unp->unp_vnode = NULL;
+               sounlock(so, SL_LOCKED);
+
+               VOP_LOCK(vp, LK_EXCLUSIVE);
+               vp->v_socket = NULL;
+
+               KERNEL_LOCK();
+               vput(vp);
+               KERNEL_UNLOCK();
+
+               solock(so);
        }
 
        if (unp->unp_conn)
@@ -511,21 +521,6 @@ unp_detach(struct unpcb *unp)
        pool_put(&unpcb_pool, unp);
        if (unp_rights)
                task_add(systqmp, &unp_gc_task);
-
-       if (vp != NULL) {
-               /*
-                * Enforce `i_lock' -> `unplock' because fifo subsystem
-                * requires it. The socket can't be closed concurrently
-                * because the file descriptor reference is
-                * still hold.
-                */
-
-               sounlock(so, SL_LOCKED);
-               KERNEL_LOCK();
-               vrele(vp);
-               KERNEL_UNLOCK();
-               solock(so);
-       }
 }
 
 int
@@ -670,7 +665,8 @@ unp_connect(struct socket *so, struct mb
                goto put_locked;
        }
        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
-               if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
+               if ((so2->so_options & SO_DYING) != 0 ||
+                   (so2->so_options & SO_ACCEPTCONN) == 0 ||
                    (so3 = sonewconn(so2, 0)) == NULL) {
                        error = ECONNREFUSED;
                        goto put_locked;
Index: sys/sys/socket.h
===================================================================
RCS file: /cvs/src/sys/sys/socket.h,v
retrieving revision 1.101
diff -u -p -r1.101 socket.h
--- sys/sys/socket.h    7 Nov 2021 12:05:28 -0000       1.101
+++ sys/sys/socket.h    10 Nov 2021 21:33:01 -0000
@@ -97,6 +97,7 @@ typedef       __sa_family_t   sa_family_t;    /* so
 #define SO_TIMESTAMP   0x0800          /* timestamp received dgram traffic */
 #define SO_BINDANY     0x1000          /* allow bind to any address */
 #define SO_ZEROIZE     0x2000          /* zero out all mbufs sent over socket 
*/
+#define SO_DYING       0x4000          /* socket is dying */
 
 /*
  * Additional options, not kept in so_options.

Reply via email to