Diff below unlocks 12 network syscalls.  It relies on the recent work
that has been to make 'struct file' refcounting mpsafe and on the
corresponding socket lock before messing with the content of any
socket.

Why can we unlock the following syscall?  Here's an explanation.

- sys_recvmsg() and sys_recvfrom() are wrappers around recvit().  They
  hold a socket reference then call soreceive() which will gran the
  corresponding socket lock.  For STREAM & DGRAM sockets this is the
  NET_LOCK() for the rest it is the KERNEL_LOCK().

- sys_accept() and sys_accept4() are wrappers around doaccept().  They
  hold a socket reference, allocate a 'struct file' then block for a
  corresponding connect(2).  Once awaken the fully populate `fp' and
  place it in the global data structure.  The race with dup2(2), while
  sleeping, has been previously fixed: 
    https://marc.info/?l=openbsd-cvs&m=152749772009924&w=2
    https://marc.info/?l=openbsd-cvs&m=152996258723362&w=2
 
- sys_getpeername() and sys_getsockname() hold a socket reference, grab
  the socket lock then go down into PRU_PEERADDR and PRU_SOCKADDR res-
  pectively.  Reading PCB fields are serialized by the corresponding
  lock.

- sys_connect() holds a socket reference, grab the socket lock and
  establish the connection under this lock.

- sys_bind(), sys_listen() and sys_shutdown() do the same but respect-
  ively call sobind(), solisten() and soshutdown() on the socket.
  solisten() reads global int-size variables that are only modified
  with the KERNEL_LOCK().  So I'm also grabbing it here to indicate
  that his needs more love.

- sys_setsockopt() and sys_getsockopt() hold a socket reference, grab
  the socket lock then call sosetopt() and sogetopt() respectively.
  Reading/writing PCB fields is fine with the corresponding lock.
  There's however one global which is created in sosplice(), even if
  the NET_LOCK() is enough to prevent races, creating a thread still
  require the KERNEL_LOCK().  So I'm grabbing it here.

Please do not forget to do "cd /sys/kern && make syscalls" after
applying this diff.

Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.184
diff -u -p -r1.184 syscalls.master
--- kern/syscalls.master        27 Jun 2018 16:38:23 -0000      1.184
+++ kern/syscalls.master        5 Jul 2018 09:35:15 -0000
@@ -88,18 +88,18 @@
 #else
 26     UNIMPL          ptrace
 #endif
-27     STD             { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
+27     STD NOLOCK      { ssize_t sys_recvmsg(int s, struct msghdr *msg, \
                            int flags); }
 28     STD NOLOCK      { ssize_t sys_sendmsg(int s, \
                            const struct msghdr *msg, int flags); }
-29     STD             { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
+29     STD NOLOCK      { ssize_t sys_recvfrom(int s, void *buf, size_t len, \
                            int flags, struct sockaddr *from, \
                            socklen_t *fromlenaddr); }
-30     STD             { int sys_accept(int s, struct sockaddr *name, \
+30     STD NOLOCK      { int sys_accept(int s, struct sockaddr *name, \
                            socklen_t *anamelen); }
-31     STD             { int sys_getpeername(int fdes, struct sockaddr *asa, \
+31     STD NOLOCK      { int sys_getpeername(int fdes, struct sockaddr *asa, \
                            socklen_t *alen); }
-32     STD             { int sys_getsockname(int fdes, struct sockaddr *asa, \
+32     STD NOLOCK      { int sys_getsockname(int fdes, struct sockaddr *asa, \
                            socklen_t *alen); }
 33     STD             { int sys_access(const char *path, int amode); }
 34     STD             { int sys_chflags(const char *path, u_int flags); }
@@ -205,7 +205,7 @@
 91     STD             { int sys_nanosleep(const struct timespec *rqtp, \
                            struct timespec *rmtp); }
 92     STD             { int sys_fcntl(int fd, int cmd, ... void *arg); }
-93     STD             { int sys_accept4(int s, struct sockaddr *name, \
+93     STD NOLOCK      { int sys_accept4(int s, struct sockaddr *name, \
                            socklen_t *anamelen, int flags); }
 94     STD             { int sys___thrsleep(const volatile void *ident, \
                            clockid_t clock_id, const struct timespec *tp, \
@@ -213,18 +213,18 @@
 95     STD             { int sys_fsync(int fd); }
 96     STD             { int sys_setpriority(int which, id_t who, int prio); }
 97     STD NOLOCK      { int sys_socket(int domain, int type, int protocol); }
-98     STD             { int sys_connect(int s, const struct sockaddr *name, \
+98     STD NOLOCK      { int sys_connect(int s, const struct sockaddr *name, \
                            socklen_t namelen); }
 99     STD             { int sys_getdents(int fd, void *buf, size_t buflen); }
 100    STD             { int sys_getpriority(int which, id_t who); }
 101    STD             { int sys_pipe2(int *fdp, int flags); }
 102    STD             { int sys_dup3(int from, int to, int flags); }
 103    STD             { int sys_sigreturn(struct sigcontext *sigcntxp); }
-104    STD             { int sys_bind(int s, const struct sockaddr *name, \
+104    STD NOLOCK      { int sys_bind(int s, const struct sockaddr *name, \
                            socklen_t namelen); }
-105    STD             { int sys_setsockopt(int s, int level, int name, \
+105    STD NOLOCK      { int sys_setsockopt(int s, int level, int name, \
                            const void *val, socklen_t valsize); }
-106    STD             { int sys_listen(int s, int backlog); }
+106    STD NOLOCK      { int sys_listen(int s, int backlog); }
 107    STD             { int sys_chflagsat(int fd, const char *path, \
                            u_int flags, int atflags); }
 108    STD             { int sys_pledge(const char *promises, \
@@ -243,7 +243,7 @@
 115    OBSOL           vtrace
 116    OBSOL           t32_gettimeofday
 117    OBSOL           t32_getrusage
-118    STD             { int sys_getsockopt(int s, int level, int name, \
+118    STD NOLOCK      { int sys_getsockopt(int s, int level, int name, \
                            void *val, socklen_t *avalsize); }
 119    STD             { int sys_thrkill(pid_t tid, int signum, void *tcb); }
 120    STD             { ssize_t sys_readv(int fd, \
@@ -264,7 +264,7 @@
 133    STD NOLOCK      { ssize_t sys_sendto(int s, const void *buf, \
                            size_t len, int flags, const struct sockaddr *to, \
                            socklen_t tolen); }
-134    STD             { int sys_shutdown(int s, int how); }
+134    STD NOLOCK      { int sys_shutdown(int s, int how); }
 135    STD NOLOCK      { int sys_socketpair(int domain, int type, \
                            int protocol, int *rsv); }
 136    STD             { int sys_mkdir(const char *path, mode_t mode); }
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.224
diff -u -p -r1.224 uipc_socket.c
--- kern/uipc_socket.c  14 Jun 2018 08:46:09 -0000      1.224
+++ kern/uipc_socket.c  5 Jul 2018 10:02:09 -0000
@@ -182,10 +182,12 @@ solisten(struct socket *so, int backlog)
        }
        if (TAILQ_FIRST(&so->so_q) == NULL)
                so->so_options |= SO_ACCEPTCONN;
+       KERNEL_LOCK();
        if (backlog < 0 || backlog > somaxconn)
                backlog = somaxconn;
        if (backlog < sominconn)
                backlog = sominconn;
+       KERNEL_UNLOCK();
        so->so_qlimit = backlog;
        sounlock(so, s);
        return (0);
@@ -1092,9 +1094,12 @@ sosplice(struct socket *so, int fd, off_
 
        soassertlocked(so);
 
-       if (sosplice_taskq == NULL)
+       if (sosplice_taskq == NULL) {
+               KERNEL_LOCK();
                sosplice_taskq = taskq_create("sosplice", 1, IPL_SOFTNET, 
                    TASKQ_MPSAFE);
+               KERNEL_UNLOCK();
+       }
        if (sosplice_taskq == NULL)
                return (ENOMEM);
 

Reply via email to