Hi,

To run over all PCB, I have introduced temporary notify lists.  They
are necessary as the list of all PCB is protected by a mutex.  Notify
may call ip_output() eventually which may sleep as pf lock is a
rwlock.  Also the SRP implementation of the route table might sleep.

The same inp_notify is used for UDP multicast and raw IP delivery.
This could be a mutex, but use the same list entry and rwlock anyway.

Currently the inp_notify list is protected by exclusive netlock.
Introduce new rwlock per PCB table to relax this requirement to
shared netlock.

ok?

bluhm

Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.271
diff -u -p -r1.271 in_pcb.c
--- netinet/in_pcb.c    21 Aug 2022 11:44:53 -0000      1.271
+++ netinet/in_pcb.c    22 Aug 2022 07:07:35 -0000
@@ -175,6 +175,7 @@ void
 in_pcbinit(struct inpcbtable *table, int hashsize)
 {
        mtx_init(&table->inpt_mtx, IPL_SOFTNET);
+       rw_init(&table->inpt_notify, "inpnotify");
        TAILQ_INIT(&table->inpt_queue);
        table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_WAITOK,
            &table->inpt_mask);
@@ -696,8 +697,6 @@ in_pcbnotifyall(struct inpcbtable *table
        struct in_addr faddr;
        u_int rdomain;
 
-       NET_ASSERT_LOCKED_EXCLUSIVE();
-
        if (dst->sa_family != AF_INET)
                return;
        faddr = satosin(dst)->sin_addr;
@@ -708,6 +707,7 @@ in_pcbnotifyall(struct inpcbtable *table
 
        SIMPLEQ_INIT(&inpcblist);
        rdomain = rtable_l2(rtable);
+       rw_enter_write(&table->inpt_notify);
        mtx_enter(&table->inpt_mtx);
        TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
 #ifdef INET6
@@ -729,6 +729,7 @@ in_pcbnotifyall(struct inpcbtable *table
                (*notify)(inp, errno);
                in_pcbunref(inp);
        }
+       rw_exit_write(&table->inpt_notify);
 }
 
 /*
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
retrieving revision 1.130
diff -u -p -r1.130 in_pcb.h
--- netinet/in_pcb.h    21 Aug 2022 11:44:53 -0000      1.130
+++ netinet/in_pcb.h    22 Aug 2022 07:07:35 -0000
@@ -66,6 +66,7 @@
 
 #include <sys/queue.h>
 #include <sys/mutex.h>
+#include <sys/rwlock.h>
 #include <sys/refcnt.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
@@ -79,6 +80,7 @@
  *     I       immutable after creation
  *     N       net lock
  *     t       inpt_mtx                pcb table mutex
+ *     y       inpt_notify             pcb table rwlock for notify
  *     p       inpcb_mtx               pcb mutex
  */
 
@@ -103,7 +105,7 @@ struct inpcb {
        LIST_ENTRY(inpcb) inp_hash;             /* [t] local and foreign hash */
        LIST_ENTRY(inpcb) inp_lhash;            /* [t] local port hash */
        TAILQ_ENTRY(inpcb) inp_queue;           /* [t] inet PCB queue */
-       SIMPLEQ_ENTRY(inpcb) inp_notify;        /* [N] notify or udp append */
+       SIMPLEQ_ENTRY(inpcb) inp_notify;        /* [y] notify or udp append */
        struct    inpcbtable *inp_table;        /* [I] inet queue/hash table */
        union     inpaddru inp_faddru;          /* Foreign address. */
        union     inpaddru inp_laddru;          /* Local address. */
@@ -166,6 +168,7 @@ LIST_HEAD(inpcbhead, inpcb);
 
 struct inpcbtable {
        struct mutex inpt_mtx;                  /* protect queue and hash */
+       struct rwlock inpt_notify;              /* protect inp_notify list */
        TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */
        struct  inpcbhead *inpt_hashtbl;        /* [t] local and foreign hash */
        struct  inpcbhead *inpt_lhashtbl;       /* [t] local port hash */
Index: netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.133
diff -u -p -r1.133 raw_ip.c
--- netinet/raw_ip.c    21 Aug 2022 22:45:55 -0000      1.133
+++ netinet/raw_ip.c    22 Aug 2022 07:07:35 -0000
@@ -160,8 +160,8 @@ rip_input(struct mbuf **mp, int *offp, i
                }
        }
 #endif
-       NET_ASSERT_LOCKED_EXCLUSIVE();
        SIMPLEQ_INIT(&inpcblist);
+       rw_enter_write(&rawcbtable.inpt_notify);
        mtx_enter(&rawcbtable.inpt_mtx);
        TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
                if (inp->inp_socket->so_state & SS_CANTRCVMORE)
@@ -189,6 +189,8 @@ rip_input(struct mbuf **mp, int *offp, i
        mtx_leave(&rawcbtable.inpt_mtx);
 
        if (SIMPLEQ_EMPTY(&inpcblist)) {
+               rw_exit_write(&rawcbtable.inpt_notify);
+
                if (ip->ip_p != IPPROTO_ICMP)
                        icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
                            0, 0);
@@ -199,6 +201,8 @@ rip_input(struct mbuf **mp, int *offp, i
                counters[ips_noproto]++;
                counters[ips_delivered]--;
                counters_leave(&ref, ipcounters);
+
+               return IPPROTO_DONE;
        }
 
        while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
@@ -224,6 +228,8 @@ rip_input(struct mbuf **mp, int *offp, i
                }
                in_pcbunref(inp);
        }
+       rw_exit_write(&rawcbtable.inpt_notify);
+
        return IPPROTO_DONE;
 }
 
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.285
diff -u -p -r1.285 udp_usrreq.c
--- netinet/udp_usrreq.c        21 Aug 2022 22:45:55 -0000      1.285
+++ netinet/udp_usrreq.c        22 Aug 2022 07:14:17 -0000
@@ -372,8 +372,8 @@ udp_input(struct mbuf **mp, int *offp, i
                 * Locate pcb(s) for datagram.
                 * (Algorithm copied from raw_intr().)
                 */
-               NET_ASSERT_LOCKED_EXCLUSIVE();
                SIMPLEQ_INIT(&inpcblist);
+               rw_enter_write(&udbtable.inpt_notify);
                mtx_enter(&udbtable.inpt_mtx);
                TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
                        if (inp->inp_socket->so_state & SS_CANTRCVMORE)
@@ -446,6 +446,8 @@ udp_input(struct mbuf **mp, int *offp, i
                mtx_leave(&udbtable.inpt_mtx);
 
                if (SIMPLEQ_EMPTY(&inpcblist)) {
+                       rw_exit_write(&udbtable.inpt_notify);
+
                        /*
                         * No matching pcb found; discard datagram.
                         * (No need to send an ICMP Port Unreachable
@@ -469,6 +471,8 @@ udp_input(struct mbuf **mp, int *offp, i
                        }
                        in_pcbunref(inp);
                }
+               rw_exit_write(&udbtable.inpt_notify);
+
                return IPPROTO_DONE;
        }
        /*
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.119
diff -u -p -r1.119 in6_pcb.c
--- netinet6/in6_pcb.c  8 Aug 2022 12:06:31 -0000       1.119
+++ netinet6/in6_pcb.c  22 Aug 2022 07:07:35 -0000
@@ -387,8 +387,6 @@ in6_pcbnotify(struct inpcbtable *table, 
        u_int32_t flowinfo;
        u_int rdomain;
 
-       NET_ASSERT_LOCKED_EXCLUSIVE();
-
        if ((unsigned)cmd >= PRC_NCMDS)
                return;
 
@@ -430,6 +428,7 @@ in6_pcbnotify(struct inpcbtable *table, 
 
        SIMPLEQ_INIT(&inpcblist);
        rdomain = rtable_l2(rtable);
+       rw_enter_write(&table->inpt_notify);
        mtx_enter(&table->inpt_mtx);
        TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
                if ((inp->inp_flags & INP_IPV6) == 0)
@@ -513,6 +512,7 @@ in6_pcbnotify(struct inpcbtable *table, 
                (*notify)(inp, errno);
                in_pcbunref(inp);
        }
+       rw_exit_write(&table->inpt_notify);
 }
 
 struct inpcb *
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.153
diff -u -p -r1.153 raw_ip6.c
--- netinet6/raw_ip6.c  21 Aug 2022 22:45:55 -0000      1.153
+++ netinet6/raw_ip6.c  22 Aug 2022 07:07:35 -0000
@@ -172,8 +172,8 @@ rip6_input(struct mbuf **mp, int *offp, 
                }
        }
 #endif
-       NET_ASSERT_LOCKED_EXCLUSIVE();
        SIMPLEQ_INIT(&inpcblist);
+       rw_enter_write(&rawin6pcbtable.inpt_notify);
        mtx_enter(&rawin6pcbtable.inpt_mtx);
        TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
                if (in6p->inp_socket->so_state & SS_CANTRCVMORE)
@@ -224,6 +224,8 @@ rip6_input(struct mbuf **mp, int *offp, 
                struct counters_ref ref;
                uint64_t *counters;
 
+               rw_exit_write(&rawin6pcbtable.inpt_notify);
+
                if (proto != IPPROTO_ICMPV6) {
                        rip6stat_inc(rip6s_nosock);
                        if (m->m_flags & M_MCAST)
@@ -240,6 +242,8 @@ rip6_input(struct mbuf **mp, int *offp, 
                counters = counters_enter(&ref, ip6counters);
                counters[ip6s_delivered]--;
                counters_leave(&ref, ip6counters);
+
+               return IPPROTO_DONE;
        }
 
        while ((in6p = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
@@ -267,6 +271,8 @@ rip6_input(struct mbuf **mp, int *offp, 
                }
                in_pcbunref(in6p);
        }
+       rw_exit_write(&rawin6pcbtable.inpt_notify);
+
        return IPPROTO_DONE;
 }
 

Reply via email to