On Thu, Aug 04, 2022 at 06:07:49PM +0300, Vitaliy Makkoveev wrote:
> Yes please. Also I like (*pr_usrreq)() be splitted to multiple handlers.
With all you feedback diff looks like this now.
bluhm
Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.280
diff -u -p -r1.280 uipc_socket.c
--- kern/uipc_socket.c 25 Jul 2022 07:28:22 -0000 1.280
+++ kern/uipc_socket.c 4 Aug 2022 21:26:33 -0000
@@ -836,10 +836,10 @@ bad:
if (mp)
*mp = NULL;
- solock(so);
+ solock_shared(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
@@ -907,7 +907,7 @@ restart:
sbunlock(so, &so->so_rcv);
error = sbwait(so, &so->so_rcv);
if (error) {
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
goto restart;
@@ -976,11 +976,11 @@ dontblock:
sbsync(&so->so_rcv, nextrecord);
if (controlp) {
if (pr->pr_domain->dom_externalize) {
- sounlock(so);
+ sounlock_shared(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
- solock(so);
+ solock_shared(so);
}
*controlp = cm;
} else {
@@ -1054,9 +1054,9 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
- sounlock(so);
+ sounlock_shared(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
- solock(so);
+ solock_shared(so);
if (uio_error)
uio->uio_resid = resid - len;
} else
@@ -1140,7 +1140,7 @@ dontblock:
error = sbwait(so, &so->so_rcv);
if (error) {
sbunlock(so, &so->so_rcv);
- sounlock(so);
+ sounlock_shared(so);
return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
@@ -1186,7 +1186,7 @@ dontblock:
*flagsp |= flags;
release:
sbunlock(so, &so->so_rcv);
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
Index: kern/uipc_socket2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.126
diff -u -p -r1.126 uipc_socket2.c
--- kern/uipc_socket2.c 25 Jul 2022 07:28:22 -0000 1.126
+++ kern/uipc_socket2.c 4 Aug 2022 21:45:24 -0000
@@ -360,6 +360,24 @@ solock(struct socket *so)
}
}
+void
+solock_shared(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ if (so->so_proto->pr_lock != NULL) {
+ NET_RLOCK_IN_SYSCALL();
+ (*so->so_proto->pr_lock)(so);
+ } else
+ NET_LOCK();
+ break;
+ default:
+ rw_enter_write(&so->so_lock);
+ break;
+ }
+}
+
int
solock_persocket(struct socket *so)
{
@@ -403,6 +421,24 @@ sounlock(struct socket *so)
}
void
+sounlock_shared(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ if (so->so_proto->pr_unlock != NULL) {
+ (*so->so_proto->pr_unlock)(so);
+ NET_RUNLOCK_IN_SYSCALL();
+ } else
+ NET_UNLOCK();
+ break;
+ default:
+ rw_exit_write(&so->so_lock);
+ break;
+ }
+}
+
+void
soassertlocked(struct socket *so)
{
switch (so->so_proto->pr_domain->dom_family) {
@@ -425,7 +461,15 @@ sosleep_nsec(struct socket *so, void *id
switch (so->so_proto->pr_domain->dom_family) {
case PF_INET:
case PF_INET6:
+ if (so->so_proto->pr_unlock != NULL &&
+ rw_status(&netlock) == RW_READ) {
+ (*so->so_proto->pr_unlock)(so);
+ }
ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
+ if (so->so_proto->pr_lock != NULL &&
+ rw_status(&netlock) == RW_READ) {
+ (*so->so_proto->pr_lock)(so);
+ }
break;
default:
ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs);
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.268
diff -u -p -r1.268 in_pcb.c
--- netinet/in_pcb.c 28 Jun 2022 09:32:27 -0000 1.268
+++ netinet/in_pcb.c 4 Aug 2022 21:26:33 -0000
@@ -236,6 +236,7 @@ in_pcballoc(struct socket *so, struct in
inp->inp_table = table;
inp->inp_socket = so;
refcnt_init_trace(&inp->inp_refcnt, DT_REFCNT_IDX_INPCB);
+ mtx_init(&inp->inp_mtx, IPL_SOFTNET);
inp->inp_seclevel[SL_AUTH] = IPSEC_AUTH_LEVEL_DEFAULT;
inp->inp_seclevel[SL_ESP_TRANS] = IPSEC_ESP_TRANS_LEVEL_DEFAULT;
inp->inp_seclevel[SL_ESP_NETWORK] = IPSEC_ESP_NETWORK_LEVEL_DEFAULT;
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
retrieving revision 1.129
diff -u -p -r1.129 in_pcb.h
--- netinet/in_pcb.h 15 May 2022 09:12:20 -0000 1.129
+++ netinet/in_pcb.h 4 Aug 2022 21:26:33 -0000
@@ -79,6 +79,7 @@
* I immutable after creation
* N net lock
* t inpt_mtx pcb table mutex
+ * p inpcb_mtx pcb mutex
*/
struct pf_state_key;
@@ -121,6 +122,7 @@ struct inpcb {
#define inp_route inp_ru.ru_route
#define inp_route6 inp_ru.ru_route6
struct refcnt inp_refcnt; /* refcount PCB, delay memory free */
+ struct mutex inp_mtx; /* protect PCB and socket members */
int inp_flags; /* generic IP/datagram flags */
union { /* Header prototype. */
struct ip hu_ip;
Index: netinet/in_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.98
diff -u -p -r1.98 in_proto.c
--- netinet/in_proto.c 25 Feb 2022 23:51:03 -0000 1.98
+++ netinet/in_proto.c 4 Aug 2022 21:44:07 -0000
@@ -387,6 +387,8 @@ const struct protosw inetsw[] = {
.pr_usrreq = divert_usrreq,
.pr_attach = divert_attach,
.pr_detach = divert_detach,
+ .pr_lock = divert_lock,
+ .pr_unlock = divert_unlock,
.pr_init = divert_init,
.pr_sysctl = divert_sysctl
},
Index: netinet/ip_divert.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.68
diff -u -p -r1.68 ip_divert.c
--- netinet/ip_divert.c 9 May 2022 19:33:46 -0000 1.68
+++ netinet/ip_divert.c 4 Aug 2022 21:31:30 -0000
@@ -221,22 +221,15 @@ divert_packet(struct mbuf *m, int dir, u
if_put(ifp);
}
+ mtx_enter(&inp->inp_mtx);
so = inp->inp_socket;
- /*
- * XXXSMP sbappendaddr() is not MP safe and this function is called
- * from pf with shared netlock. To call only one sbappendaddr() from
- * divert_packet(), protect it with kernel lock. All other places
- * call sbappendaddr() with exclusive net lock. This blocks
- * divert_packet() as we have the shared lock.
- */
- KERNEL_LOCK();
if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) {
- KERNEL_UNLOCK();
+ mtx_leave(&inp->inp_mtx);
divstat_inc(divs_fullsock);
goto bad;
}
- sorwakeup(inp->inp_socket);
- KERNEL_UNLOCK();
+ mtx_leave(&inp->inp_mtx);
+ sorwakeup(so);
in_pcbunref(inp);
return;
@@ -356,6 +349,24 @@ divert_detach(struct socket *so)
in_pcbdetach(inp);
return (0);
+}
+
+void
+divert_lock(struct socket *so)
+{
+ struct inpcb *inp = sotoinpcb(so);
+
+ NET_ASSERT_LOCKED();
+ mtx_enter(&inp->inp_mtx);
+}
+
+void
+divert_unlock(struct socket *so)
+{
+ struct inpcb *inp = sotoinpcb(so);
+
+ NET_ASSERT_LOCKED();
+ mtx_leave(&inp->inp_mtx);
}
int
Index: netinet/ip_divert.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.h,v
retrieving revision 1.15
diff -u -p -r1.15 ip_divert.h
--- netinet/ip_divert.h 5 May 2022 16:44:22 -0000 1.15
+++ netinet/ip_divert.h 4 Aug 2022 21:43:44 -0000
@@ -72,5 +72,7 @@ int divert_usrreq(struct socket *,
int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *);
int divert_attach(struct socket *, int);
int divert_detach(struct socket *);
+void divert_lock(struct socket *);
+void divert_unlock(struct socket *);
#endif /* _KERNEL */
#endif /* _IP_DIVERT_H_ */
Index: sys/protosw.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v
retrieving revision 1.35
diff -u -p -r1.35 protosw.h
--- sys/protosw.h 25 Feb 2022 23:51:04 -0000 1.35
+++ sys/protosw.h 4 Aug 2022 21:41:19 -0000
@@ -83,6 +83,8 @@ struct protosw {
int (*pr_attach)(struct socket *, int);
int (*pr_detach)(struct socket *);
+ void (*pr_lock)(struct socket *);
+ void (*pr_unlock)(struct socket *);
/* utility hooks */
void (*pr_init)(void); /* initialization hook */
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.106
diff -u -p -r1.106 socketvar.h
--- sys/socketvar.h 15 Jul 2022 17:20:24 -0000 1.106
+++ sys/socketvar.h 4 Aug 2022 21:26:33 -0000
@@ -346,9 +346,11 @@ int sockargs(struct mbuf **, const void
int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
void solock(struct socket *);
+void solock_shared(struct socket *);
int solock_persocket(struct socket *);
void solock_pair(struct socket *, struct socket *);
void sounlock(struct socket *);
+void sounlock_shared(struct socket *);
int sendit(struct proc *, int, struct msghdr *, int, register_t *);
int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);
Index: sys/systm.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/systm.h,v
retrieving revision 1.157
diff -u -p -r1.157 systm.h
--- sys/systm.h 12 Jul 2022 17:12:31 -0000 1.157
+++ sys/systm.h 4 Aug 2022 21:31:39 -0000
@@ -341,6 +341,15 @@ extern struct rwlock netlock;
#define NET_RLOCK_IN_IOCTL() do { rw_enter_read(&netlock); } while
(0)
#define NET_RUNLOCK_IN_IOCTL() do { rw_exit_read(&netlock); } while (0)
+/*
+ * Reader version of NET_LOCK() to be used in send and receive syscall.
+ *
+ * Can be grabbed instead of the exclusive version when no field
+ * protected by the NET_LOCK() is modified by the ioctl/sysctl.
+ */
+#define NET_RLOCK_IN_SYSCALL() do { rw_enter_read(&netlock); } while
(0)
+#define NET_RUNLOCK_IN_SYSCALL() do { rw_exit_read(&netlock); } while
(0)
+
#ifdef DIAGNOSTIC
#define NET_ASSERT_UNLOCKED()
\