When a per-protocol attach function is called the given socket is not
yet reachable, so there's no need to lock it.  So the diff below remove
the solock/sounlock dance and shows where the NET_LOCK() is required to
protect some specific global data structures.

I reordered the different blocks in all pr_attach for consistency but
also to be able to relax the assertions in soreserve() & friends.

The interesting bits are in tcp_attach() since that's the only function
which is also called from the packet processing path.

Comments?  Oks?

Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.226
diff -u -p -r1.226 uipc_socket.c
--- kern/uipc_socket.c  30 Jul 2018 12:22:14 -0000      1.226
+++ kern/uipc_socket.c  30 Jul 2018 12:46:34 -0000
@@ -141,15 +141,14 @@ socreate(int dom, struct socket **aso, i
        so->so_cpid = p->p_p->ps_pid;
        so->so_proto = prp;
 
-       s = solock(so);
        error = (*prp->pr_attach)(so, proto);
        if (error) {
+               s = solock(so);
                so->so_state |= SS_NOFDREF;
                /* sofree() calls sounlock(). */
                sofree(so, s);
                return (error);
        }
-       sounlock(so, s);
        *aso = so;
        return (0);
 }
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.96
diff -u -p -r1.96 uipc_socket2.c
--- kern/uipc_socket2.c 10 Jul 2018 10:02:14 -0000      1.96
+++ kern/uipc_socket2.c 30 Jul 2018 12:46:34 -0000
@@ -96,7 +96,9 @@ soisconnected(struct socket *so)
 {
        struct socket *head = so->so_head;
 
-       soassertlocked(so);
+       if ((so->so_pcb != NULL) || head != NULL)
+               soassertlocked(so);
+
        so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
        so->so_state |= SS_ISCONNECTED;
        if (head && soqremque(so, 0)) {
@@ -148,8 +150,7 @@ sonewconn(struct socket *head, int conns
 
        /*
         * XXXSMP as long as `so' and `head' share the same lock, we
-        * can call soreserve() and pr_attach() below w/o expliclitly
-        * locking `so'.
+        * can call soqinsque() below w/o expliclitly locking `so'.
         */
        soassertlocked(head);
 
@@ -189,12 +190,11 @@ sonewconn(struct socket *head, int conns
        so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
        so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
 
-       soqinsque(head, so, soqueue);
        if ((*so->so_proto->pr_attach)(so, 0)) {
-               (void) soqremque(so, soqueue);
                pool_put(&socket_pool, so);
                return (NULL);
        }
+       soqinsque(head, so, soqueue);
        if (connstatus) {
                sorwakeup(head);
                wakeup(&head->so_timeo);
@@ -448,7 +448,8 @@ sowakeup(struct socket *so, struct sockb
 int
 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
-       soassertlocked(so);
+       if (so->so_pcb != NULL)
+               soassertlocked(so);
 
        if (sbreserve(so, &so->so_snd, sndcc))
                goto bad;
@@ -478,7 +479,8 @@ int
 sbreserve(struct socket *so, struct sockbuf *sb, u_long cc)
 {
        KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
-       soassertlocked(so);
+       if (so->so_pcb != NULL)
+               soassertlocked(so);
 
        if (cc == 0 || cc > sb_max)
                return (1);
@@ -948,7 +950,8 @@ sbdrop(struct socket *so, struct sockbuf
        struct mbuf *next;
 
        KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
-       soassertlocked(so);
+       if (so->so_pcb != NULL)
+               soassertlocked(so);
 
        next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
        while (len > 0) {
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.134
diff -u -p -r1.134 uipc_usrreq.c
--- kern/uipc_usrreq.c  9 Jul 2018 10:58:21 -0000       1.134
+++ kern/uipc_usrreq.c  30 Jul 2018 12:46:34 -0000
@@ -336,7 +336,7 @@ uipc_attach(struct socket *so, int proto
 {
        struct unpcb *unp;
        int error;
-       
+
        if (so->so_pcb)
                return EISCONN;
        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
Index: net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.189
diff -u -p -r1.189 pfkeyv2.c
--- net/pfkeyv2.c       10 Jul 2018 20:28:34 -0000      1.189
+++ net/pfkeyv2.c       30 Jul 2018 12:50:33 -0000
@@ -266,23 +266,19 @@ pfkeyv2_attach(struct socket *so, int pr
        if ((so->so_state & SS_PRIV) == 0)
                return EACCES;
 
-       kp = malloc(sizeof(struct pkpcb), M_PCB, M_WAITOK | M_ZERO);
-       so->so_pcb = kp;
-       refcnt_init(&kp->kcb_refcnt);
-
        error = soreserve(so, PFKEYSNDQ, PFKEYRCVQ);
-       if (error) {
-               free(kp, M_PCB, sizeof(struct pkpcb));
-               return (error);
-       }
+       if (error)
+               return error;
 
+       kp = malloc(sizeof(struct pkpcb), M_PCB, M_WAITOK | M_ZERO);
+       refcnt_init(&kp->kcb_refcnt);
        kp->kcb_socket = so;
-
-       so->so_options |= SO_USELOOPBACK;
-       soisconnected(so);
-
        kp->kcb_pid = curproc->p_p->ps_pid;
        kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid);
+
+       so->so_pcb = kp;
+       so->so_state |= SS_ISCONNECTED;
+       so->so_options |= SO_USELOOPBACK;
 
        rw_enter(&pkptable.pkp_lk, RW_WRITE);
        SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp, 
kcb_list);
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.279
diff -u -p -r1.279 rtsock.c
--- net/rtsock.c        10 Jul 2018 20:28:34 -0000      1.279
+++ net/rtsock.c        30 Jul 2018 12:51:47 -0000
@@ -288,32 +288,22 @@ route_attach(struct socket *so, int prot
        struct rtpcb    *rop;
        int              error;
 
-       /*
-        * use the rawcb but allocate a rtpcb, this
-        * code does not care about the additional fields
-        * and works directly on the raw socket.
-        */
-       rop = malloc(sizeof(struct rtpcb), M_PCB, M_WAITOK|M_ZERO);
-       so->so_pcb = rop;
-       /* Init the timeout structure */
-       timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
-       refcnt_init(&rop->rop_refcnt);
-
        if (curproc == NULL)
                error = EACCES;
        else
                error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
-       if (error) {
-               free(rop, M_PCB, sizeof(struct rtpcb));
+       if (error)
                return (error);
-       }
 
+       rop = malloc(sizeof(struct rtpcb), M_PCB, M_WAITOK|M_ZERO);
+       timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
+       refcnt_init(&rop->rop_refcnt);
        rop->rop_socket = so;
        rop->rop_proto = proto;
-
        rop->rop_rtableid = curproc->p_p->ps_rtableid;
 
-       soisconnected(so);
+       so->so_pcb = rop;
+       so->so_state |= SS_ISCONNECTED;
        so->so_options |= SO_USELOOPBACK;
 
        rw_enter(&rtptable.rtp_lk, RW_WRITE);
Index: netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.57
diff -u -p -r1.57 ip_divert.c
--- netinet/ip_divert.c 24 Apr 2018 15:40:55 -0000      1.57
+++ netinet/ip_divert.c 30 Jul 2018 12:46:34 -0000
@@ -319,11 +319,13 @@ divert_attach(struct socket *so, int pro
        if ((so->so_state & SS_PRIV) == 0)
                return EACCES;
 
-       error = in_pcballoc(so, &divbtable);
+       error = soreserve(so, divert_sendspace, divert_recvspace);
        if (error)
                return error;
 
-       error = soreserve(so, divert_sendspace, divert_recvspace);
+       NET_LOCK();
+       error = in_pcballoc(so, &divbtable);
+       NET_UNLOCK();
        if (error)
                return error;
 
Index: netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.111
diff -u -p -r1.111 raw_ip.c
--- netinet/raw_ip.c    5 Jul 2018 21:16:52 -0000       1.111
+++ netinet/raw_ip.c    30 Jul 2018 12:46:34 -0000
@@ -516,17 +516,21 @@ rip_attach(struct socket *so, int proto)
        struct inpcb *inp;
        int error;
 
-       if (so->so_pcb)
-               panic("rip_attach");
+       if (so->so_pcb != NULL)
+               return EINVAL;
        if ((so->so_state & SS_PRIV) == 0)
                return EACCES;
        if (proto < 0 || proto >= IPPROTO_MAX)
                return EPROTONOSUPPORT;
 
-       if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
+       error = soreserve(so, rip_sendspace, rip_recvspace);
+       if (error)
                return error;
-       NET_ASSERT_LOCKED();
-       if ((error = in_pcballoc(so, &rawcbtable)))
+
+       NET_LOCK();
+       error = in_pcballoc(so, &rawcbtable);
+       NET_UNLOCK();
+       if (error)
                return error;
        inp = sotoinpcb(so);
        inp->inp_ip.ip_p = proto;
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.169
diff -u -p -r1.169 tcp_usrreq.c
--- netinet/tcp_usrreq.c        11 Jun 2018 07:40:26 -0000      1.169
+++ netinet/tcp_usrreq.c        30 Jul 2018 12:46:34 -0000
@@ -562,9 +562,9 @@ tcp_attach(struct socket *so, int proto)
 {
        struct tcpcb *tp;
        struct inpcb *inp;
-       int error;
+       int error, netlocked = 0;
 
-       if (so->so_pcb)
+       if (so->so_pcb != NULL)
                return EISCONN;
        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 ||
            sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) ||
@@ -574,8 +574,19 @@ tcp_attach(struct socket *so, int proto)
                        return (error);
        }
 
-       NET_ASSERT_LOCKED();
+       /*
+        * XXXSMP As long as the PCB table is protected by the NET_LOCK()
+        * we have to check if it is already held.
+        * That's because tcp_input() calls sonewcon() while holding the
+        * lock.
+        */
+       if (rw_status(&netlock) != RW_WRITE) {
+               NET_LOCK();
+               netlocked = 1;
+       }
        error = in_pcballoc(so, &tcbtable);
+       if (netlocked)
+               NET_UNLOCK();
        if (error)
                return (error);
        inp = sotoinpcb(so);
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.250
diff -u -p -r1.250 udp_usrreq.c
--- netinet/udp_usrreq.c        5 Jul 2018 21:16:52 -0000       1.250
+++ netinet/udp_usrreq.c        30 Jul 2018 12:46:34 -0000
@@ -1217,11 +1217,14 @@ udp_attach(struct socket *so, int proto)
        if (so->so_pcb != NULL)
                return EINVAL;
 
-       if ((error = soreserve(so, udp_sendspace, udp_recvspace)))
+       error = soreserve(so, udp_sendspace, udp_recvspace);
+       if (error)
                return error;
 
-       NET_ASSERT_LOCKED();
-       if ((error = in_pcballoc(so, &udbtable)))
+       NET_LOCK();
+       error = in_pcballoc(so, &udbtable);
+       NET_UNLOCK();
+       if (error)
                return error;
 #ifdef INET6
        if (sotoinpcb(so)->inp_flags & INP_IPV6)
Index: netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.56
diff -u -p -r1.56 ip6_divert.c
--- netinet6/ip6_divert.c       24 Apr 2018 15:40:55 -0000      1.56
+++ netinet6/ip6_divert.c       30 Jul 2018 12:46:34 -0000
@@ -323,13 +323,16 @@ divert6_attach(struct socket *so, int pr
        if ((so->so_state & SS_PRIV) == 0)
                return EACCES;
 
-       error = in_pcballoc(so, &divb6table);
+       error = soreserve(so, divert6_sendspace, divert6_recvspace);
        if (error)
                return (error);
 
-       error = soreserve(so, divert6_sendspace, divert6_recvspace);
+       NET_LOCK();
+       error = in_pcballoc(so, &divb6table);
+       NET_UNLOCK();
        if (error)
                return (error);
+
        sotoinpcb(so)->inp_flags |= INP_HDRINCL;
        return (0);
 }
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.129
diff -u -p -r1.129 raw_ip6.c
--- netinet6/raw_ip6.c  5 Jul 2018 21:16:52 -0000       1.129
+++ netinet6/raw_ip6.c  30 Jul 2018 12:46:34 -0000
@@ -706,17 +706,21 @@ rip6_attach(struct socket *so, int proto
        struct inpcb *in6p;
        int error;
 
-       if (so->so_pcb)
-               panic("rip6_attach");
+       if (so->so_pcb != NULL)
+               return EINVAL;
        if ((so->so_state & SS_PRIV) == 0)
-               return (EACCES);
+               return EACCES;
        if (proto < 0 || proto >= IPPROTO_MAX)
                return EPROTONOSUPPORT;
 
-       if ((error = soreserve(so, rip6_sendspace, rip6_recvspace)))
+       error = soreserve(so, rip6_sendspace, rip6_recvspace);
+       if (error)
                return error;
-       NET_ASSERT_LOCKED();
-       if ((error = in_pcballoc(so, &rawin6pcbtable)))
+
+       NET_LOCK();
+       error = in_pcballoc(so, &rawin6pcbtable);
+       NET_UNLOCK();
+       if (error)
                return error;
 
        in6p = sotoinpcb(so);
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.86
diff -u -p -r1.86 socketvar.h
--- sys/socketvar.h     30 Jul 2018 12:22:14 -0000      1.86
+++ sys/socketvar.h     30 Jul 2018 12:47:16 -0000
@@ -145,10 +145,9 @@ struct socket {
 #define        SS_CANTSENDMORE         0x010   /* can't send more data to peer 
*/
 #define        SS_CANTRCVMORE          0x020   /* can't receive more data from 
peer */
 #define        SS_RCVATMARK            0x040   /* at mark on input */
-#define        SS_ISDISCONNECTED       0x800   /* socket disconnected from 
peer */
-
 #define        SS_PRIV                 0x080   /* privileged for broadcast, 
raw... */
 #define        SS_ASYNC                0x200   /* async i/o notify */
+#define        SS_ISDISCONNECTED       0x800   /* socket disconnected from 
peer */
 #define        SS_CONNECTOUT           0x1000  /* connect, not accept, at this 
end */
 #define        SS_ISSENDING            0x2000  /* hint for lower layer */
 #define        SS_DNS                  0x4000  /* created using SOCK_DNS 
socket(2) */

Reply via email to