On Thu, Nov 11, 2021 at 12:42:36AM +0300, Vitaliy Makkoveev wrote:
> On Wed, Nov 10, 2021 at 10:03:48PM +0100, Alexander Bluhm wrote:
> > On Tue, Oct 26, 2021 at 02:12:36PM +0300, Vitaliy Makkoveev wrote:
> > > --- sys/kern/uipc_socket.c 14 Oct 2021 23:05:10 -0000 1.265
> > > +++ sys/kern/uipc_socket.c 26 Oct 2021 11:05:59 -0000
> > > @@ -315,6 +315,8 @@ soclose(struct socket *so, int flags)
> > > /* Revoke async IO early. There is a final revocation in sofree(). */
> > > sigio_free(&so->so_sigio);
> > > if (so->so_options & SO_ACCEPTCONN) {
> > > + so->so_options &= ~SO_ACCEPTCONN;
> > > +
> > > while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
> > > (void) soqremque(so2, 0);
> > > (void) soabort(so2);
> >
> > I don't like this. A listening socket can never be converted to a
> > non-listening socket. Whatever this transition means in th TCP
> > layer. If you want to mark a listening socket as closing to avoid
> > accepting connections, use a approriate flag. Do not convert it
> > to an non-listening socket that may have races elsewhere.
> >
> > I would say some so_state bits may be suitable if we really need
> > this feature.
> >
>
> This is temporary solution. I want to reorder soclose() to destroy PCB
> before `so_q0' and `so_q' cleanup, so this 'SO_ACCEPTCONN' modification
> will be removed. This is required because the relock dances will
> appeared in socose() and unp_detach() for the per-socket locking.
>
> We don't release netlock in the inet sockets case, so they are not
> affected.
>
> What about 'SO_DYING' flag? Anyway I want to remove it with the next
> diff.
>
> > > - if (unp->unp_vnode) {
> > > +
> > > + if (vp) {
> >
> > Please use (vp != NULL) for consistency.
> >
>
> No problem, but I made "if (vp)" without NULL for the consistency with
> "if (unp->unp_vnode)" in this function. Should they have the same
> notation?
>
> > > + unp->unp_vnode = NULL;
> > > +
> > > /*
> > > - * `v_socket' is only read in unp_connect and
> > > - * unplock prevents concurrent access.
> > > + * Enforce `i_lock' -> `unp_lock' because fifo
> > > + * subsystem requires it.
> > > */
> > >
> > > - unp->unp_vnode->v_socket = NULL;
> > > - vp = unp->unp_vnode;
> > > - unp->unp_vnode = NULL;
> > > + sounlock(so, SL_LOCKED);
> > > +
> > > + VOP_LOCK(vp, LK_EXCLUSIVE);
> > > + vp->v_socket = NULL;
> > > +
> > > + KERNEL_LOCK();
> > > + vput(vp);
> > > + KERNEL_UNLOCK();
> > > +
> > > + solock(so);
> > > }
> >
> > This might work, we should try it.
> >
> > > @@ -500,21 +510,6 @@ unp_detach(struct unpcb *unp)
> > > pool_put(&unpcb_pool, unp);
> > > if (unp_rights)
> > > task_add(systqmp, &unp_gc_task);
> > > -
> > > - if (vp != NULL) {
> > > - /*
> > > - * Enforce `i_lock' -> `unplock' because fifo subsystem
> > > - * requires it. The socket can't be closed concurrently
> > > - * because the file descriptor reference is
> > > - * still hold.
> > > - */
> > > -
> > > - sounlock(so, SL_LOCKED);
> > > - KERNEL_LOCK();
> > > - vrele(vp);
> > > - KERNEL_UNLOCK();
> > > - solock(so);
> > > - }
> > > }
> >
> > Why did you move the lock dance to the beginning of the function?
> > Does it matter?
> >
>
> Yes, I want to disconnect dying socket from the file system before start
> it's destruction. This does matter because in some cases we need to
> release the solock() before acquire solock() for two sockets to keep
> lock order between them. With listening socket disconnected from the
> file system we can't have concurrent threads performing connect to dying
> socket.
>
> Also this is the reason I want to reorder listening socket destruction
> in the soclose().
>
> OK for updated diff?
>
Can I propose to commit this diff before? It reorders soclose() to
destroy PCB before `so_q0' and `so_q' cleanup.
The dying socket is already unlinked from the file descriptor layer, but
still accessible from the stack or from the file system layer. When we
release solock() while processing `so_q0' or `so_q' queues or when
performing (*pr_detach)(), concurrent thread could perform connection.
This reorder prevents this. The reorder will be required for the
upcoming fine grained locking diffs.
According the current code we assume the `so_q0' or `so_q' queues
cleanup could be done with destroyed PCB so this diff doesn't modify
existing logic. With this diff committed I don't need to modify
`so_state' in the unp_detach() reorder diff which started this thread.
Also the tool to test the diff:
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdio.h>
#include <stdlib.h>
#include <err.h>
#include <pthread.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#define NTHR (10)
#define NCONNECT (10)
#define NACCEPT ((NTHR * NCONNECT) / 2)
static void *
thr_conn(void *arg)
{
struct sockaddr_storage *ss = arg;
int s[NCONNECT];
size_t hd = 0, tl = 0;
while (1) {
if ((s[hd] = socket(ss->ss_family, SOCK_STREAM, 0)) < 0)
continue;
if (connect(s[hd], (struct sockaddr *)ss, ss->ss_len) < 0) {
close(s[hd]);
continue;
}
if ((hd = (hd + 1) % NCONNECT) == tl) {
close(s[tl]);
tl = (tl + 1) % NCONNECT;
}
}
return NULL;
}
static void
usage(void)
{
extern char *__progname;
fprintf(stderr, "Usage %s: unix | inet | inet6\n", __progname);
exit(1);
}
int
main(int argc, char *argv[])
{
struct sockaddr_storage ss;
size_t i;
int s;
if (argc != 2) {
usage();
}
memset(&ss, 0, sizeof(ss));
if (strcmp(argv[1], "unix") == 0) {
struct sockaddr_un *sun;
sun = (struct sockaddr_un *)&ss;
sun->sun_len = sizeof(*sun);
sun->sun_family = AF_UNIX;
snprintf(sun->sun_path, sizeof(sun->sun_path) - 1,
"/tmp/socket%d", getpid());
} else if (strcmp(argv[1], "inet") == 0) {
struct sockaddr_in *sin;
sin = (struct sockaddr_in *)&ss;
sin->sin_len = sizeof(*sin);
sin->sin_family = AF_INET;
sin->sin_port = htons(10000+(getpid()%10000));
if (inet_pton(AF_INET, "127.0.0.1", &sin->sin_addr) <= 0)
errx(1, "inet_pton()");
} else if (strcmp(argv[1], "inet6") == 0) {
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *)&ss;
sin6->sin6_len = sizeof(*sin6);
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(10000+(getpid()%10000));
if (inet_pton(AF_INET6, "::1", &sin6->sin6_addr) <= 0)
errx(1, "inet_pton()");
} else
usage();
for (i = 0; i < NTHR; ++i) {
pthread_t thr;
int error;
if ((error = pthread_create(&thr, NULL, thr_conn, &ss)))
errc(1, error, "pthread_create()");
}
while(1) {
int conns[NACCEPT];
if ((s = socket(ss.ss_family, SOCK_STREAM, 0)) < 0)
err(1, "socket()");
if (ss.ss_family == AF_UNIX) {
struct sockaddr_un *sun = (struct sockaddr_un *)&ss;
unlink(sun->sun_path);
} else {
int opt = 1;
if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR,
&opt, sizeof(opt)) < 0)
err(1, "setsockopt()");
}
if (bind(s, (struct sockaddr *)&ss, ss.ss_len) < 0)
err(1, "bind()");
if (listen(s, 10) < 0)
err(1, "listen()");
for(i=0; i < NACCEPT; ++i) {
if ((conns[i] = accept(s, NULL, NULL)) < 0)
err(1, "accept()");
}
close(s);
for(i=0; i < NACCEPT; ++i) {
close(conns[i]);
}
}
return 0;
}
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.267
diff -u -p -r1.267 uipc_socket.c
--- sys/kern/uipc_socket.c 24 Oct 2021 07:02:47 -0000 1.267
+++ sys/kern/uipc_socket.c 11 Nov 2021 08:56:17 -0000
@@ -322,19 +322,9 @@ soclose(struct socket *so, int flags)
s = solock(so);
/* Revoke async IO early. There is a final revocation in sofree(). */
sigio_free(&so->so_sigio);
- if (so->so_options & SO_ACCEPTCONN) {
- while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
- (void) soqremque(so2, 0);
- (void) soabort(so2);
- }
- while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
- (void) soqremque(so2, 1);
- (void) soabort(so2);
- }
- }
- if (so->so_pcb == NULL)
- goto discard;
if (so->so_state & SS_ISCONNECTED) {
+ if (so->so_pcb == NULL)
+ goto discard;
if ((so->so_state & SS_ISDISCONNECTING) == 0) {
error = sodisconnect(so);
if (error)
@@ -360,6 +350,16 @@ drop:
error2 = (*so->so_proto->pr_detach)(so);
if (error == 0)
error = error2;
+ }
+ if (so->so_options & SO_ACCEPTCONN) {
+ while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
+ (void) soqremque(so2, 0);
+ (void) soabort(so2);
+ }
+ while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
+ (void) soqremque(so2, 1);
+ (void) soabort(so2);
+ }
}
discard:
if (so->so_state & SS_NOFDREF)