When a per-protocol attach function is called the given socket is not
yet reachable, so there's no need to lock it. So the diff below remove
the solock/sounlock dance and shows where the NET_LOCK() is required to
protect some specific global data structures.
I reordered the different blocks in all pr_attach for consistency but
also to be able to relax the assertions in soreserve() & friends.
The interesting bits are in tcp_attach() since that's the only function
which is also called from the packet processing path.
Comments? Oks?
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.226
diff -u -p -r1.226 uipc_socket.c
--- kern/uipc_socket.c 30 Jul 2018 12:22:14 -0000 1.226
+++ kern/uipc_socket.c 30 Jul 2018 12:46:34 -0000
@@ -141,15 +141,14 @@ socreate(int dom, struct socket **aso, i
so->so_cpid = p->p_p->ps_pid;
so->so_proto = prp;
- s = solock(so);
error = (*prp->pr_attach)(so, proto);
if (error) {
+ s = solock(so);
so->so_state |= SS_NOFDREF;
/* sofree() calls sounlock(). */
sofree(so, s);
return (error);
}
- sounlock(so, s);
*aso = so;
return (0);
}
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.96
diff -u -p -r1.96 uipc_socket2.c
--- kern/uipc_socket2.c 10 Jul 2018 10:02:14 -0000 1.96
+++ kern/uipc_socket2.c 30 Jul 2018 12:46:34 -0000
@@ -96,7 +96,9 @@ soisconnected(struct socket *so)
{
struct socket *head = so->so_head;
- soassertlocked(so);
+ if ((so->so_pcb != NULL) || head != NULL)
+ soassertlocked(so);
+
so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
so->so_state |= SS_ISCONNECTED;
if (head && soqremque(so, 0)) {
@@ -148,8 +150,7 @@ sonewconn(struct socket *head, int conns
/*
* XXXSMP as long as `so' and `head' share the same lock, we
- * can call soreserve() and pr_attach() below w/o expliclitly
- * locking `so'.
+ * can call soqinsque() below w/o expliclitly locking `so'.
*/
soassertlocked(head);
@@ -189,12 +190,11 @@ sonewconn(struct socket *head, int conns
so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
- soqinsque(head, so, soqueue);
if ((*so->so_proto->pr_attach)(so, 0)) {
- (void) soqremque(so, soqueue);
pool_put(&socket_pool, so);
return (NULL);
}
+ soqinsque(head, so, soqueue);
if (connstatus) {
sorwakeup(head);
wakeup(&head->so_timeo);
@@ -448,7 +448,8 @@ sowakeup(struct socket *so, struct sockb
int
soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
{
- soassertlocked(so);
+ if (so->so_pcb != NULL)
+ soassertlocked(so);
if (sbreserve(so, &so->so_snd, sndcc))
goto bad;
@@ -478,7 +479,8 @@ int
sbreserve(struct socket *so, struct sockbuf *sb, u_long cc)
{
KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
- soassertlocked(so);
+ if (so->so_pcb != NULL)
+ soassertlocked(so);
if (cc == 0 || cc > sb_max)
return (1);
@@ -948,7 +950,8 @@ sbdrop(struct socket *so, struct sockbuf
struct mbuf *next;
KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
- soassertlocked(so);
+ if (so->so_pcb != NULL)
+ soassertlocked(so);
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
while (len > 0) {
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.134
diff -u -p -r1.134 uipc_usrreq.c
--- kern/uipc_usrreq.c 9 Jul 2018 10:58:21 -0000 1.134
+++ kern/uipc_usrreq.c 30 Jul 2018 12:46:34 -0000
@@ -336,7 +336,7 @@ uipc_attach(struct socket *so, int proto
{
struct unpcb *unp;
int error;
-
+
if (so->so_pcb)
return EISCONN;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
Index: net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.189
diff -u -p -r1.189 pfkeyv2.c
--- net/pfkeyv2.c 10 Jul 2018 20:28:34 -0000 1.189
+++ net/pfkeyv2.c 30 Jul 2018 12:50:33 -0000
@@ -266,23 +266,19 @@ pfkeyv2_attach(struct socket *so, int pr
if ((so->so_state & SS_PRIV) == 0)
return EACCES;
- kp = malloc(sizeof(struct pkpcb), M_PCB, M_WAITOK | M_ZERO);
- so->so_pcb = kp;
- refcnt_init(&kp->kcb_refcnt);
-
error = soreserve(so, PFKEYSNDQ, PFKEYRCVQ);
- if (error) {
- free(kp, M_PCB, sizeof(struct pkpcb));
- return (error);
- }
+ if (error)
+ return error;
+ kp = malloc(sizeof(struct pkpcb), M_PCB, M_WAITOK | M_ZERO);
+ refcnt_init(&kp->kcb_refcnt);
kp->kcb_socket = so;
-
- so->so_options |= SO_USELOOPBACK;
- soisconnected(so);
-
kp->kcb_pid = curproc->p_p->ps_pid;
kp->kcb_rdomain = rtable_l2(curproc->p_p->ps_rtableid);
+
+ so->so_pcb = kp;
+ so->so_state |= SS_ISCONNECTED;
+ so->so_options |= SO_USELOOPBACK;
rw_enter(&pkptable.pkp_lk, RW_WRITE);
SRPL_INSERT_HEAD_LOCKED(&pkptable.pkp_rc, &pkptable.pkp_list, kp,
kcb_list);
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.279
diff -u -p -r1.279 rtsock.c
--- net/rtsock.c 10 Jul 2018 20:28:34 -0000 1.279
+++ net/rtsock.c 30 Jul 2018 12:51:47 -0000
@@ -288,32 +288,22 @@ route_attach(struct socket *so, int prot
struct rtpcb *rop;
int error;
- /*
- * use the rawcb but allocate a rtpcb, this
- * code does not care about the additional fields
- * and works directly on the raw socket.
- */
- rop = malloc(sizeof(struct rtpcb), M_PCB, M_WAITOK|M_ZERO);
- so->so_pcb = rop;
- /* Init the timeout structure */
- timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
- refcnt_init(&rop->rop_refcnt);
-
if (curproc == NULL)
error = EACCES;
else
error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
- if (error) {
- free(rop, M_PCB, sizeof(struct rtpcb));
+ if (error)
return (error);
- }
+ rop = malloc(sizeof(struct rtpcb), M_PCB, M_WAITOK|M_ZERO);
+ timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
+ refcnt_init(&rop->rop_refcnt);
rop->rop_socket = so;
rop->rop_proto = proto;
-
rop->rop_rtableid = curproc->p_p->ps_rtableid;
- soisconnected(so);
+ so->so_pcb = rop;
+ so->so_state |= SS_ISCONNECTED;
so->so_options |= SO_USELOOPBACK;
rw_enter(&rtptable.rtp_lk, RW_WRITE);
Index: netinet/ip_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.57
diff -u -p -r1.57 ip_divert.c
--- netinet/ip_divert.c 24 Apr 2018 15:40:55 -0000 1.57
+++ netinet/ip_divert.c 30 Jul 2018 12:46:34 -0000
@@ -319,11 +319,13 @@ divert_attach(struct socket *so, int pro
if ((so->so_state & SS_PRIV) == 0)
return EACCES;
- error = in_pcballoc(so, &divbtable);
+ error = soreserve(so, divert_sendspace, divert_recvspace);
if (error)
return error;
- error = soreserve(so, divert_sendspace, divert_recvspace);
+ NET_LOCK();
+ error = in_pcballoc(so, &divbtable);
+ NET_UNLOCK();
if (error)
return error;
Index: netinet/raw_ip.c
===================================================================
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.111
diff -u -p -r1.111 raw_ip.c
--- netinet/raw_ip.c 5 Jul 2018 21:16:52 -0000 1.111
+++ netinet/raw_ip.c 30 Jul 2018 12:46:34 -0000
@@ -516,17 +516,21 @@ rip_attach(struct socket *so, int proto)
struct inpcb *inp;
int error;
- if (so->so_pcb)
- panic("rip_attach");
+ if (so->so_pcb != NULL)
+ return EINVAL;
if ((so->so_state & SS_PRIV) == 0)
return EACCES;
if (proto < 0 || proto >= IPPROTO_MAX)
return EPROTONOSUPPORT;
- if ((error = soreserve(so, rip_sendspace, rip_recvspace)))
+ error = soreserve(so, rip_sendspace, rip_recvspace);
+ if (error)
return error;
- NET_ASSERT_LOCKED();
- if ((error = in_pcballoc(so, &rawcbtable)))
+
+ NET_LOCK();
+ error = in_pcballoc(so, &rawcbtable);
+ NET_UNLOCK();
+ if (error)
return error;
inp = sotoinpcb(so);
inp->inp_ip.ip_p = proto;
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.169
diff -u -p -r1.169 tcp_usrreq.c
--- netinet/tcp_usrreq.c 11 Jun 2018 07:40:26 -0000 1.169
+++ netinet/tcp_usrreq.c 30 Jul 2018 12:46:34 -0000
@@ -562,9 +562,9 @@ tcp_attach(struct socket *so, int proto)
{
struct tcpcb *tp;
struct inpcb *inp;
- int error;
+ int error, netlocked = 0;
- if (so->so_pcb)
+ if (so->so_pcb != NULL)
return EISCONN;
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 ||
sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) ||
@@ -574,8 +574,19 @@ tcp_attach(struct socket *so, int proto)
return (error);
}
- NET_ASSERT_LOCKED();
+ /*
+ * XXXSMP As long as the PCB table is protected by the NET_LOCK()
+ * we have to check if it is already held.
+ * That's because tcp_input() calls sonewcon() while holding the
+ * lock.
+ */
+ if (rw_status(&netlock) != RW_WRITE) {
+ NET_LOCK();
+ netlocked = 1;
+ }
error = in_pcballoc(so, &tcbtable);
+ if (netlocked)
+ NET_UNLOCK();
if (error)
return (error);
inp = sotoinpcb(so);
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.250
diff -u -p -r1.250 udp_usrreq.c
--- netinet/udp_usrreq.c 5 Jul 2018 21:16:52 -0000 1.250
+++ netinet/udp_usrreq.c 30 Jul 2018 12:46:34 -0000
@@ -1217,11 +1217,14 @@ udp_attach(struct socket *so, int proto)
if (so->so_pcb != NULL)
return EINVAL;
- if ((error = soreserve(so, udp_sendspace, udp_recvspace)))
+ error = soreserve(so, udp_sendspace, udp_recvspace);
+ if (error)
return error;
- NET_ASSERT_LOCKED();
- if ((error = in_pcballoc(so, &udbtable)))
+ NET_LOCK();
+ error = in_pcballoc(so, &udbtable);
+ NET_UNLOCK();
+ if (error)
return error;
#ifdef INET6
if (sotoinpcb(so)->inp_flags & INP_IPV6)
Index: netinet6/ip6_divert.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.56
diff -u -p -r1.56 ip6_divert.c
--- netinet6/ip6_divert.c 24 Apr 2018 15:40:55 -0000 1.56
+++ netinet6/ip6_divert.c 30 Jul 2018 12:46:34 -0000
@@ -323,13 +323,16 @@ divert6_attach(struct socket *so, int pr
if ((so->so_state & SS_PRIV) == 0)
return EACCES;
- error = in_pcballoc(so, &divb6table);
+ error = soreserve(so, divert6_sendspace, divert6_recvspace);
if (error)
return (error);
- error = soreserve(so, divert6_sendspace, divert6_recvspace);
+ NET_LOCK();
+ error = in_pcballoc(so, &divb6table);
+ NET_UNLOCK();
if (error)
return (error);
+
sotoinpcb(so)->inp_flags |= INP_HDRINCL;
return (0);
}
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.129
diff -u -p -r1.129 raw_ip6.c
--- netinet6/raw_ip6.c 5 Jul 2018 21:16:52 -0000 1.129
+++ netinet6/raw_ip6.c 30 Jul 2018 12:46:34 -0000
@@ -706,17 +706,21 @@ rip6_attach(struct socket *so, int proto
struct inpcb *in6p;
int error;
- if (so->so_pcb)
- panic("rip6_attach");
+ if (so->so_pcb != NULL)
+ return EINVAL;
if ((so->so_state & SS_PRIV) == 0)
- return (EACCES);
+ return EACCES;
if (proto < 0 || proto >= IPPROTO_MAX)
return EPROTONOSUPPORT;
- if ((error = soreserve(so, rip6_sendspace, rip6_recvspace)))
+ error = soreserve(so, rip6_sendspace, rip6_recvspace);
+ if (error)
return error;
- NET_ASSERT_LOCKED();
- if ((error = in_pcballoc(so, &rawin6pcbtable)))
+
+ NET_LOCK();
+ error = in_pcballoc(so, &rawin6pcbtable);
+ NET_UNLOCK();
+ if (error)
return error;
in6p = sotoinpcb(so);
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.86
diff -u -p -r1.86 socketvar.h
--- sys/socketvar.h 30 Jul 2018 12:22:14 -0000 1.86
+++ sys/socketvar.h 30 Jul 2018 12:47:16 -0000
@@ -145,10 +145,9 @@ struct socket {
#define SS_CANTSENDMORE 0x010 /* can't send more data to peer
*/
#define SS_CANTRCVMORE 0x020 /* can't receive more data from
peer */
#define SS_RCVATMARK 0x040 /* at mark on input */
-#define SS_ISDISCONNECTED 0x800 /* socket disconnected from
peer */
-
#define SS_PRIV 0x080 /* privileged for broadcast,
raw... */
#define SS_ASYNC 0x200 /* async i/o notify */
+#define SS_ISDISCONNECTED 0x800 /* socket disconnected from
peer */
#define SS_CONNECTOUT 0x1000 /* connect, not accept, at this
end */
#define SS_ISSENDING 0x2000 /* hint for lower layer */
#define SS_DNS 0x4000 /* created using SOCK_DNS
socket(2) */