On Mon, Aug 22, 2022 at 09:38:15AM +0200, Alexander Bluhm wrote:
> Hi,
> 
> To run over all PCB, I have introduced temporary notify lists.  They
> are necessary as the list of all PCB is protected by a mutex.  Notify
> may call ip_output() eventually which may sleep as pf lock is a
> rwlock.  Also the SRP implementation of the route table might sleep.
> 
> The same inp_notify is used for UDP multicast and raw IP delivery.
> This could be a mutex, but use the same list entry and rwlock anyway.
> 
> Currently the inp_notify list is protected by exclusive netlock.
> Introduce new rwlock per PCB table to relax this requirement to
> shared netlock.
> 
> ok?
>

ok mvs@

> bluhm
> 
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.271
> diff -u -p -r1.271 in_pcb.c
> --- netinet/in_pcb.c  21 Aug 2022 11:44:53 -0000      1.271
> +++ netinet/in_pcb.c  22 Aug 2022 07:07:35 -0000
> @@ -175,6 +175,7 @@ void
>  in_pcbinit(struct inpcbtable *table, int hashsize)
>  {
>       mtx_init(&table->inpt_mtx, IPL_SOFTNET);
> +     rw_init(&table->inpt_notify, "inpnotify");
>       TAILQ_INIT(&table->inpt_queue);
>       table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_WAITOK,
>           &table->inpt_mask);
> @@ -696,8 +697,6 @@ in_pcbnotifyall(struct inpcbtable *table
>       struct in_addr faddr;
>       u_int rdomain;
>  
> -     NET_ASSERT_LOCKED_EXCLUSIVE();
> -
>       if (dst->sa_family != AF_INET)
>               return;
>       faddr = satosin(dst)->sin_addr;
> @@ -708,6 +707,7 @@ in_pcbnotifyall(struct inpcbtable *table
>  
>       SIMPLEQ_INIT(&inpcblist);
>       rdomain = rtable_l2(rtable);
> +     rw_enter_write(&table->inpt_notify);
>       mtx_enter(&table->inpt_mtx);
>       TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
>  #ifdef INET6
> @@ -729,6 +729,7 @@ in_pcbnotifyall(struct inpcbtable *table
>               (*notify)(inp, errno);
>               in_pcbunref(inp);
>       }
> +     rw_exit_write(&table->inpt_notify);
>  }
>  
>  /*
> Index: netinet/in_pcb.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> retrieving revision 1.130
> diff -u -p -r1.130 in_pcb.h
> --- netinet/in_pcb.h  21 Aug 2022 11:44:53 -0000      1.130
> +++ netinet/in_pcb.h  22 Aug 2022 07:07:35 -0000
> @@ -66,6 +66,7 @@
>  
>  #include <sys/queue.h>
>  #include <sys/mutex.h>
> +#include <sys/rwlock.h>
>  #include <sys/refcnt.h>
>  #include <netinet/ip6.h>
>  #include <netinet6/ip6_var.h>
> @@ -79,6 +80,7 @@
>   *   I       immutable after creation
>   *   N       net lock
>   *   t       inpt_mtx                pcb table mutex
> + *   y       inpt_notify             pcb table rwlock for notify
>   *   p       inpcb_mtx               pcb mutex
>   */
>  
> @@ -103,7 +105,7 @@ struct inpcb {
>       LIST_ENTRY(inpcb) inp_hash;             /* [t] local and foreign hash */
>       LIST_ENTRY(inpcb) inp_lhash;            /* [t] local port hash */
>       TAILQ_ENTRY(inpcb) inp_queue;           /* [t] inet PCB queue */
> -     SIMPLEQ_ENTRY(inpcb) inp_notify;        /* [N] notify or udp append */
> +     SIMPLEQ_ENTRY(inpcb) inp_notify;        /* [y] notify or udp append */
>       struct    inpcbtable *inp_table;        /* [I] inet queue/hash table */
>       union     inpaddru inp_faddru;          /* Foreign address. */
>       union     inpaddru inp_laddru;          /* Local address. */
> @@ -166,6 +168,7 @@ LIST_HEAD(inpcbhead, inpcb);
>  
>  struct inpcbtable {
>       struct mutex inpt_mtx;                  /* protect queue and hash */
> +     struct rwlock inpt_notify;              /* protect inp_notify list */
>       TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */
>       struct  inpcbhead *inpt_hashtbl;        /* [t] local and foreign hash */
>       struct  inpcbhead *inpt_lhashtbl;       /* [t] local port hash */
> Index: netinet/raw_ip.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 raw_ip.c
> --- netinet/raw_ip.c  21 Aug 2022 22:45:55 -0000      1.133
> +++ netinet/raw_ip.c  22 Aug 2022 07:07:35 -0000
> @@ -160,8 +160,8 @@ rip_input(struct mbuf **mp, int *offp, i
>               }
>       }
>  #endif
> -     NET_ASSERT_LOCKED_EXCLUSIVE();
>       SIMPLEQ_INIT(&inpcblist);
> +     rw_enter_write(&rawcbtable.inpt_notify);
>       mtx_enter(&rawcbtable.inpt_mtx);
>       TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
>               if (inp->inp_socket->so_state & SS_CANTRCVMORE)
> @@ -189,6 +189,8 @@ rip_input(struct mbuf **mp, int *offp, i
>       mtx_leave(&rawcbtable.inpt_mtx);
>  
>       if (SIMPLEQ_EMPTY(&inpcblist)) {
> +             rw_exit_write(&rawcbtable.inpt_notify);
> +
>               if (ip->ip_p != IPPROTO_ICMP)
>                       icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
>                           0, 0);
> @@ -199,6 +201,8 @@ rip_input(struct mbuf **mp, int *offp, i
>               counters[ips_noproto]++;
>               counters[ips_delivered]--;
>               counters_leave(&ref, ipcounters);
> +
> +             return IPPROTO_DONE;
>       }
>  
>       while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> @@ -224,6 +228,8 @@ rip_input(struct mbuf **mp, int *offp, i
>               }
>               in_pcbunref(inp);
>       }
> +     rw_exit_write(&rawcbtable.inpt_notify);
> +
>       return IPPROTO_DONE;
>  }
>  
> Index: netinet/udp_usrreq.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> retrieving revision 1.285
> diff -u -p -r1.285 udp_usrreq.c
> --- netinet/udp_usrreq.c      21 Aug 2022 22:45:55 -0000      1.285
> +++ netinet/udp_usrreq.c      22 Aug 2022 07:14:17 -0000
> @@ -372,8 +372,8 @@ udp_input(struct mbuf **mp, int *offp, i
>                * Locate pcb(s) for datagram.
>                * (Algorithm copied from raw_intr().)
>                */
> -             NET_ASSERT_LOCKED_EXCLUSIVE();
>               SIMPLEQ_INIT(&inpcblist);
> +             rw_enter_write(&udbtable.inpt_notify);
>               mtx_enter(&udbtable.inpt_mtx);
>               TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
>                       if (inp->inp_socket->so_state & SS_CANTRCVMORE)
> @@ -446,6 +446,8 @@ udp_input(struct mbuf **mp, int *offp, i
>               mtx_leave(&udbtable.inpt_mtx);
>  
>               if (SIMPLEQ_EMPTY(&inpcblist)) {
> +                     rw_exit_write(&udbtable.inpt_notify);
> +
>                       /*
>                        * No matching pcb found; discard datagram.
>                        * (No need to send an ICMP Port Unreachable
> @@ -469,6 +471,8 @@ udp_input(struct mbuf **mp, int *offp, i
>                       }
>                       in_pcbunref(inp);
>               }
> +             rw_exit_write(&udbtable.inpt_notify);
> +
>               return IPPROTO_DONE;
>       }
>       /*
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> retrieving revision 1.119
> diff -u -p -r1.119 in6_pcb.c
> --- netinet6/in6_pcb.c        8 Aug 2022 12:06:31 -0000       1.119
> +++ netinet6/in6_pcb.c        22 Aug 2022 07:07:35 -0000
> @@ -387,8 +387,6 @@ in6_pcbnotify(struct inpcbtable *table, 
>       u_int32_t flowinfo;
>       u_int rdomain;
>  
> -     NET_ASSERT_LOCKED_EXCLUSIVE();
> -
>       if ((unsigned)cmd >= PRC_NCMDS)
>               return;
>  
> @@ -430,6 +428,7 @@ in6_pcbnotify(struct inpcbtable *table, 
>  
>       SIMPLEQ_INIT(&inpcblist);
>       rdomain = rtable_l2(rtable);
> +     rw_enter_write(&table->inpt_notify);
>       mtx_enter(&table->inpt_mtx);
>       TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
>               if ((inp->inp_flags & INP_IPV6) == 0)
> @@ -513,6 +512,7 @@ in6_pcbnotify(struct inpcbtable *table, 
>               (*notify)(inp, errno);
>               in_pcbunref(inp);
>       }
> +     rw_exit_write(&table->inpt_notify);
>  }
>  
>  struct inpcb *
> Index: netinet6/raw_ip6.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
> retrieving revision 1.153
> diff -u -p -r1.153 raw_ip6.c
> --- netinet6/raw_ip6.c        21 Aug 2022 22:45:55 -0000      1.153
> +++ netinet6/raw_ip6.c        22 Aug 2022 07:07:35 -0000
> @@ -172,8 +172,8 @@ rip6_input(struct mbuf **mp, int *offp, 
>               }
>       }
>  #endif
> -     NET_ASSERT_LOCKED_EXCLUSIVE();
>       SIMPLEQ_INIT(&inpcblist);
> +     rw_enter_write(&rawin6pcbtable.inpt_notify);
>       mtx_enter(&rawin6pcbtable.inpt_mtx);
>       TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
>               if (in6p->inp_socket->so_state & SS_CANTRCVMORE)
> @@ -224,6 +224,8 @@ rip6_input(struct mbuf **mp, int *offp, 
>               struct counters_ref ref;
>               uint64_t *counters;
>  
> +             rw_exit_write(&rawin6pcbtable.inpt_notify);
> +
>               if (proto != IPPROTO_ICMPV6) {
>                       rip6stat_inc(rip6s_nosock);
>                       if (m->m_flags & M_MCAST)
> @@ -240,6 +242,8 @@ rip6_input(struct mbuf **mp, int *offp, 
>               counters = counters_enter(&ref, ip6counters);
>               counters[ip6s_delivered]--;
>               counters_leave(&ref, ip6counters);
> +
> +             return IPPROTO_DONE;
>       }
>  
>       while ((in6p = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> @@ -267,6 +271,8 @@ rip6_input(struct mbuf **mp, int *offp, 
>               }
>               in_pcbunref(in6p);
>       }
> +     rw_exit_write(&rawin6pcbtable.inpt_notify);
> +
>       return IPPROTO_DONE;
>  }
>  
> 

Reply via email to