On Tue, Oct 04, 2016 at 04:44:29PM +0200, Martin Pieuchot wrote:
> On 10/03/16 16:43, Martin Pieuchot wrote:
> > Diff below introduces a single write lock that will be used to serialize
> > access to ip_output().
> > 
> > This lock will be then split in multiple readers and writers to allow
> > multiple forwarding paths to run in parallel of each others but still
> > serialized with the socket layer.
> > 
> > I'm currently looking for people wanting to run this diff and try to
> > break it.  In other words, your machine might panic with it and if it
> > does report the panic to me so the diff can be improved.
> > 
> > I tested NFS v2 and v3 so I'm quite confident, but I might have missed
> > some obvious stuff.
> 
> Updated diff attaced including a fix for syn_cache_timer(), problem
> reported by Chris Jackman.
> 

So far, so good, on i386 and amd64 vmm(4) VMs. booted, did a pkg_add 
upgrade, and cvsync.

No issues seen so far.

-ml

> Index: kern/kern_rwlock.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 kern_rwlock.c
> --- kern/kern_rwlock.c        14 Mar 2015 07:33:42 -0000      1.27
> +++ kern/kern_rwlock.c        4 Oct 2016 14:40:29 -0000
> @@ -98,6 +98,12 @@ rw_enter_read(struct rwlock *rwl)
>               membar_enter();
>  }
>  
> +#if 1
> +#include <machine/db_machdep.h>
> +#include <ddb/db_output.h>
> +#include <ddb/db_interface.h>
> +#endif
> +
>  void
>  rw_enter_write(struct rwlock *rwl)
>  {
> @@ -108,6 +114,15 @@ rw_enter_write(struct rwlock *rwl)
>               rw_enter(rwl, RW_WRITE);
>       else
>               membar_enter();
> +
> +#if 1
> +     if ((rwl == &netlock) && (splassert_ctl == 3)) {
> +             printf("ENTER::%d::", cpu_number());
> +             db_stack_trace_print(
> +                 (db_expr_t)__builtin_frame_address(1),
> +                 TRUE, 1, "", printf);
> +     }
> +#endif
>  }
>  
>  void
> @@ -129,6 +144,15 @@ rw_exit_write(struct rwlock *rwl)
>       unsigned long owner = rwl->rwl_owner;
>  
>       rw_assert_wrlock(rwl);
> +
> +#if 1
> +     if ((rwl == &netlock) && (splassert_ctl == 3)) {
> +             printf("EXIT::%d::", cpu_number());
> +             db_stack_trace_print(
> +                 (db_expr_t)__builtin_frame_address(1),
> +                 TRUE, 1, "", printf);
> +     }
> +#endif
>  
>       membar_exit();
>       if (__predict_false((owner & RWLOCK_WAIT) ||
> Index: kern/sys_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_socket.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 sys_socket.c
> --- kern/sys_socket.c 5 Dec 2015 10:11:53 -0000       1.21
> +++ kern/sys_socket.c 4 Oct 2016 14:40:29 -0000
> @@ -131,8 +131,10 @@ soo_poll(struct file *fp, int events, st
>  {
>       struct socket *so = fp->f_data;
>       int revents = 0;
> -     int s = splsoftnet();
> +     int s;
>  
> +     rw_enter_write(&netlock);
> +     s = splsoftnet();
>       if (events & (POLLIN | POLLRDNORM)) {
>               if (soreadable(so))
>                       revents |= events & (POLLIN | POLLRDNORM);
> @@ -159,6 +161,7 @@ soo_poll(struct file *fp, int events, st
>               }
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>       return (revents);
>  }
>  
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.161
> diff -u -p -r1.161 uipc_socket.c
> --- kern/uipc_socket.c        20 Sep 2016 14:27:43 -0000      1.161
> +++ kern/uipc_socket.c        4 Oct 2016 14:40:29 -0000
> @@ -123,6 +123,7 @@ socreate(int dom, struct socket **aso, i
>               return (EPROTONOSUPPORT);
>       if (prp->pr_type != type)
>               return (EPROTOTYPE);
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
>       TAILQ_INIT(&so->so_q0);
> @@ -142,9 +143,11 @@ socreate(int dom, struct socket **aso, i
>               so->so_state |= SS_NOFDREF;
>               sofree(so);
>               splx(s);
> +             rw_exit_write(&netlock);
>               return (error);
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>       *aso = so;
>       return (0);
>  }
> @@ -152,11 +155,13 @@ socreate(int dom, struct socket **aso, i
>  int
>  sobind(struct socket *so, struct mbuf *nam, struct proc *p)
>  {
> -     int s = splsoftnet();
> -     int error;
> +     int s, error;
>  
> +     rw_enter_write(&netlock);
> +     s = splsoftnet();
>       error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
>       splx(s);
> +     rw_exit_write(&netlock);
>       return (error);
>  }
>  
> @@ -171,11 +176,13 @@ solisten(struct socket *so, int backlog)
>       if (isspliced(so) || issplicedback(so))
>               return (EOPNOTSUPP);
>  #endif /* SOCKET_SPLICE */
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
>           curproc);
>       if (error) {
>               splx(s);
> +             rw_exit_write(&netlock);
>               return (error);
>       }
>       if (TAILQ_FIRST(&so->so_q) == NULL)
> @@ -186,6 +193,7 @@ solisten(struct socket *so, int backlog)
>               backlog = sominconn;
>       so->so_qlimit = backlog;
>       splx(s);
> +     rw_exit_write(&netlock);
>       return (0);
>  }
>  
> @@ -196,6 +204,7 @@ solisten(struct socket *so, int backlog)
>  void
>  sofree(struct socket *so)
>  {
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>       if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
> @@ -234,9 +243,10 @@ int
>  soclose(struct socket *so)
>  {
>       struct socket *so2;
> -     int s = splsoftnet();           /* conservative */
> -     int error = 0;
> +     int s, error = 0;
>  
> +     rw_enter_write(&netlock);
> +     s = splsoftnet();               /* conservative */
>       if (so->so_options & SO_ACCEPTCONN) {
>               while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
>                       (void) soqremque(so2, 0);
> @@ -260,7 +270,7 @@ soclose(struct socket *so)
>                           (so->so_state & SS_NBIO))
>                               goto drop;
>                       while (so->so_state & SS_ISCONNECTED) {
> -                             error = tsleep(&so->so_timeo,
> +                             error = rwsleep(&so->so_timeo, &netlock,
>                                   PSOCK | PCATCH, "netcls",
>                                   so->so_linger * hz);
>                               if (error)
> @@ -281,6 +291,7 @@ discard:
>       so->so_state |= SS_NOFDREF;
>       sofree(so);
>       splx(s);
> +     rw_exit_write(&netlock);
>       return (error);
>  }
>  
> @@ -290,6 +301,7 @@ discard:
>  int
>  soabort(struct socket *so)
>  {
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>       return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL,
> @@ -301,6 +313,7 @@ soaccept(struct socket *so, struct mbuf 
>  {
>       int error = 0;
>  
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>       if ((so->so_state & SS_NOFDREF) == 0)
> @@ -318,11 +331,11 @@ soaccept(struct socket *so, struct mbuf 
>  int
>  soconnect(struct socket *so, struct mbuf *nam)
>  {
> -     int s;
> -     int error;
> +     int s, error;
>  
>       if (so->so_options & SO_ACCEPTCONN)
>               return (EOPNOTSUPP);
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       /*
>        * If protocol is connection-based, can only connect once.
> @@ -338,18 +351,21 @@ soconnect(struct socket *so, struct mbuf
>               error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
>                   NULL, nam, NULL, curproc);
>       splx(s);
> +     rw_exit_write(&netlock);
>       return (error);
>  }
>  
>  int
>  soconnect2(struct socket *so1, struct socket *so2)
>  {
> -     int s = splsoftnet();
> -     int error;
> +     int s, error;
>  
> +     rw_enter_write(&netlock);
> +     s = splsoftnet();
>       error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
>           (struct mbuf *)so2, NULL, curproc);
>       splx(s);
> +     rw_exit_write(&netlock);
>       return (error);
>  }
>  
> @@ -358,14 +374,20 @@ sodisconnect(struct socket *so)
>  {
>       int error;
>  
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
> -     if ((so->so_state & SS_ISCONNECTED) == 0)
> -             return (ENOTCONN);
> -     if (so->so_state & SS_ISDISCONNECTING)
> -             return (EALREADY);
> +     if ((so->so_state & SS_ISCONNECTED) == 0) {
> +             error = ENOTCONN;
> +             goto bad;
> +     }
> +     if (so->so_state & SS_ISDISCONNECTING) {
> +             error = EALREADY;
> +             goto bad;
> +     }
>       error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL,
>           NULL, curproc);
> +bad:
>       return (error);
>  }
>  
> @@ -426,21 +448,21 @@ sosend(struct socket *so, struct mbuf *a
>                           (sizeof(struct file *) / sizeof(int)));
>       }
>  
> -#define      snderr(errno)   { error = errno; splx(s); goto release; }
> +#define      snderr(e) { error = e; splx(s); rw_exit_write(&netlock); goto 
> release; }
>  
>  restart:
>       if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
>               goto out;
>       so->so_state |= SS_ISSENDING;
>       do {
> +             rw_enter_write(&netlock);
>               s = splsoftnet();
>               if (so->so_state & SS_CANTSENDMORE)
>                       snderr(EPIPE);
>               if (so->so_error) {
>                       error = so->so_error;
>                       so->so_error = 0;
> -                     splx(s);
> -                     goto release;
> +                     snderr(error);
>               }
>               if ((so->so_state & SS_ISCONNECTED) == 0) {
>                       if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
> @@ -465,11 +487,13 @@ restart:
>                       error = sbwait(&so->so_snd);
>                       so->so_state &= ~SS_ISSENDING;
>                       splx(s);
> +                     rw_exit_write(&netlock);
>                       if (error)
>                               goto out;
>                       goto restart;
>               }
>               splx(s);
> +             rw_exit_write(&netlock);
>               space -= clen;
>               do {
>                       if (uio == NULL) {
> @@ -489,6 +513,7 @@ restart:
>                               if (flags & MSG_EOR)
>                                       top->m_flags |= M_EOR;
>                       }
> +                     rw_enter_write(&netlock);
>                       s = splsoftnet();               /* XXX */
>                       if (resid == 0)
>                               so->so_state &= ~SS_ISSENDING;
> @@ -496,6 +521,7 @@ restart:
>                           (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
>                           top, addr, control, curproc);
>                       splx(s);
> +                     rw_exit_write(&netlock);
>                       clen = 0;
>                       control = NULL;
>                       top = NULL;
> @@ -625,8 +651,8 @@ sbsync(struct sockbuf *sb, struct mbuf *
>   * must begin with an address if the protocol so specifies,
>   * followed by an optional mbuf or mbufs containing ancillary data,
>   * and then zero or more mbufs of data.
> - * In order to avoid blocking network interrupts for the entire time here,
> - * we splx() while doing the actual copy to user space.
> + * In order to avoid blocking network for the entire time here, we splx()
> + * and release ``netlock'' while doing the actual copy to user space.
>   * Although the sockbuf is locked, new data may still be appended,
>   * and thus we must maintain consistency of the sockbuf during that time.
>   *
> @@ -680,6 +706,8 @@ bad:
>  restart:
>       if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
>               return (error);
> +
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>  
>       m = so->so_rcv.sb_mb;
> @@ -746,6 +774,7 @@ restart:
>               sbunlock(&so->so_rcv);
>               error = sbwait(&so->so_rcv);
>               splx(s);
> +             rw_exit_write(&netlock);
>               if (error)
>                       return (error);
>               goto restart;
> @@ -880,7 +909,9 @@ dontblock:
>                       SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
>                       resid = uio->uio_resid;
>                       splx(s);
> +                     rw_exit_write(&netlock);
>                       uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
> +                     rw_enter_write(&netlock);
>                       s = splsoftnet();
>                       if (uio_error)
>                               uio->uio_resid = resid - len;
> @@ -964,6 +995,7 @@ dontblock:
>                       if (error) {
>                               sbunlock(&so->so_rcv);
>                               splx(s);
> +                             rw_exit_write(&netlock);
>                               return (0);
>                       }
>                       if ((m = so->so_rcv.sb_mb) != NULL)
> @@ -1000,6 +1032,7 @@ dontblock:
>           (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
>               sbunlock(&so->so_rcv);
>               splx(s);
> +             rw_exit_write(&netlock);
>               goto restart;
>       }
>  
> @@ -1011,6 +1044,7 @@ dontblock:
>  release:
>       sbunlock(&so->so_rcv);
>       splx(s);
> +     rw_exit_write(&netlock);
>       return (error);
>  }
>  
> @@ -1020,6 +1054,7 @@ soshutdown(struct socket *so, int how)
>       struct protosw *pr = so->so_proto;
>       int s, error = 0;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       switch (how) {
>       case SHUT_RD:
> @@ -1037,6 +1072,8 @@ soshutdown(struct socket *so, int how)
>               break;
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
> +
>       return (error);
>  }
>  
> @@ -1050,6 +1087,7 @@ sorflush(struct socket *so)
>  
>       sb->sb_flags |= SB_NOINTR;
>       (void) sblock(sb, M_WAITOK);
> +     /* XXXSMP */
>       s = splnet();
>       socantrcvmore(so);
>       sbunlock(sb);
> @@ -1103,10 +1141,12 @@ sosplice(struct socket *so, int fd, off_
>               if ((error = sblock(&so->so_rcv,
>                   (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0)
>                       return (error);
> +             rw_enter_write(&netlock);
>               s = splsoftnet();
>               if (so->so_sp->ssp_socket)
>                       sounsplice(so, so->so_sp->ssp_socket, 1);
>               splx(s);
> +             rw_exit_write(&netlock);
>               sbunlock(&so->so_rcv);
>               return (0);
>       }
> @@ -1135,6 +1175,7 @@ sosplice(struct socket *so, int fd, off_
>               FRELE(fp, curproc);
>               return (error);
>       }
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>  
>       if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
> @@ -1177,6 +1218,7 @@ sosplice(struct socket *so, int fd, off_
>  
>   release:
>       splx(s);
> +     rw_exit_write(&netlock);
>       sbunlock(&sosp->so_snd);
>       sbunlock(&so->so_rcv);
>       FRELE(fp, curproc);
> @@ -1186,6 +1228,7 @@ sosplice(struct socket *so, int fd, off_
>  void
>  sounsplice(struct socket *so, struct socket *sosp, int wakeup)
>  {
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>       task_del(sosplice_taskq, &so->so_splicetask);
> @@ -1203,12 +1246,14 @@ soidle(void *arg)
>       struct socket *so = arg;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
>               so->so_error = ETIMEDOUT;
>               sounsplice(so, so->so_sp->ssp_socket, 1);
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
>  
>  void
> @@ -1217,6 +1262,7 @@ sotask(void *arg)
>       struct socket *so = arg;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       if (so->so_rcv.sb_flagsintr & SB_SPLICE) {
>               /*
> @@ -1227,6 +1273,7 @@ sotask(void *arg)
>               somove(so, M_DONTWAIT);
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>  
>       /* Avoid user land starvation. */
>       yield();
> @@ -1248,6 +1295,7 @@ somove(struct socket *so, int wait)
>       int              error = 0, maxreached = 0;
>       short            state;
>  
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>   nextpkt:
> @@ -1510,6 +1558,7 @@ somove(struct socket *so, int wait)
>  void
>  sorwakeup(struct socket *so)
>  {
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>  #ifdef SOCKET_SPLICE
> @@ -1531,13 +1580,17 @@ sorwakeup(struct socket *so)
>               return;
>  #endif
>       sowakeup(so, &so->so_rcv);
> -     if (so->so_upcall)
> +     if (so->so_upcall) {
> +             rw_exit_write(&netlock);
>               (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
> +             rw_enter_write(&netlock);
> +     }
>  }
>  
>  void
>  sowwakeup(struct socket *so)
>  {
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>  #ifdef SOCKET_SPLICE
> @@ -1884,7 +1937,8 @@ soo_kqfilter(struct file *fp, struct kno
>  {
>       struct socket *so = kn->kn_fp->f_data;
>       struct sockbuf *sb;
> -     int s;
> +
> +     KERNEL_ASSERT_LOCKED();
>  
>       switch (kn->kn_filter) {
>       case EVFILT_READ:
> @@ -1902,10 +1956,9 @@ soo_kqfilter(struct file *fp, struct kno
>               return (EINVAL);
>       }
>  
> -     s = splnet();
>       SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
>       sb->sb_flags |= SB_KNOTE;
> -     splx(s);
> +
>       return (0);
>  }
>  
> @@ -1913,12 +1966,12 @@ void
>  filt_sordetach(struct knote *kn)
>  {
>       struct socket *so = kn->kn_fp->f_data;
> -     int s = splnet();
> +
> +     KERNEL_ASSERT_LOCKED();
>  
>       SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
>       if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
>               so->so_rcv.sb_flags &= ~SB_KNOTE;
> -     splx(s);
>  }
>  
>  int
> @@ -1947,12 +2000,12 @@ void
>  filt_sowdetach(struct knote *kn)
>  {
>       struct socket *so = kn->kn_fp->f_data;
> -     int s = splnet();
> +
> +     KERNEL_ASSERT_LOCKED();
>  
>       SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
>       if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
>               so->so_snd.sb_flags &= ~SB_KNOTE;
> -     splx(s);
>  }
>  
>  int
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.65
> diff -u -p -r1.65 uipc_socket2.c
> --- kern/uipc_socket2.c       2 Sep 2016 13:28:21 -0000       1.65
> +++ kern/uipc_socket2.c       4 Oct 2016 14:40:29 -0000
> @@ -138,8 +138,6 @@ soisdisconnected(struct socket *so)
>   * then we allocate a new structure, properly linked into the
>   * data structure of the original socket, and return this.
>   * Connstatus may be 0 or SS_ISCONNECTED.
> - *
> - * Must be called at splsoftnet()
>   */
>  struct socket *
>  sonewconn(struct socket *head, int connstatus)
> @@ -147,6 +145,7 @@ sonewconn(struct socket *head, int conns
>       struct socket *so;
>       int soqueue = connstatus ? 1 : 0;
>  
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>       if (mclpools[0].pr_nout > mclpools[0].pr_hardlimit * 95 / 100)
> @@ -276,10 +275,11 @@ socantrcvmore(struct socket *so)
>  int
>  sbwait(struct sockbuf *sb)
>  {
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>       sb->sb_flagsintr |= SB_WAIT;
> -     return (tsleep(&sb->sb_cc,
> +     return (rwsleep(&sb->sb_cc, &netlock,
>           (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "netio",
>           sb->sb_timeo));
>  }
> @@ -317,7 +317,8 @@ sbunlock(struct sockbuf *sb)
>  void
>  sowakeup(struct socket *so, struct sockbuf *sb)
>  {
> -     int s = splsoftnet();
> +     rw_assert_wrlock(&netlock);
> +     splassert(IPL_SOFTNET);
>  
>       selwakeup(&sb->sb_sel);
>       sb->sb_flagsintr &= ~SB_SEL;
> @@ -325,7 +326,7 @@ sowakeup(struct socket *so, struct sockb
>               sb->sb_flagsintr &= ~SB_WAIT;
>               wakeup(&sb->sb_cc);
>       }
> -     splx(s);
> +
>       if (so->so_state & SS_ASYNC)
>               csignal(so->so_pgid, SIGIO, so->so_siguid, so->so_sigeuid);
>  }
> Index: kern/uipc_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 uipc_syscalls.c
> --- kern/uipc_syscalls.c      9 Aug 2016 02:25:35 -0000       1.133
> +++ kern/uipc_syscalls.c      4 Oct 2016 14:40:29 -0000
> @@ -250,6 +250,7 @@ doaccept(struct proc *p, int sock, struc
>       if ((error = getsock(p, sock, &fp)) != 0)
>               return (error);
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       headfp = fp;
>       head = fp->f_data;
> @@ -275,7 +276,8 @@ redo:
>                       head->so_error = ECONNABORTED;
>                       break;
>               }
> -             error = tsleep(&head->so_timeo, PSOCK | PCATCH, "netcon", 0);
> +             error = rwsleep(&head->so_timeo, &netlock, PSOCK | PCATCH,
> +                 "netcon", 0);
>               if (error) {
>                       goto bad;
>               }
> @@ -352,6 +354,7 @@ redo:
>       m_freem(nam);
>  bad:
>       splx(s);
> +     rw_exit_write(&netlock);
>       FRELE(headfp, p);
>       return (error);
>  }
> @@ -406,9 +409,11 @@ sys_connect(struct proc *p, void *v, reg
>               m_freem(nam);
>               return (EINPROGRESS);
>       }
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
> -             error = tsleep(&so->so_timeo, PSOCK | PCATCH, "netcon2", 0);
> +             error = rwsleep(&so->so_timeo, &netlock, PSOCK | PCATCH,
> +                 "netcon2", 0);
>               if (error) {
>                       if (error == EINTR || error == ERESTART)
>                               interrupted = 1;
> @@ -420,6 +425,7 @@ sys_connect(struct proc *p, void *v, reg
>               so->so_error = 0;
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>  bad:
>       if (!interrupted)
>               so->so_state &= ~SS_ISCONNECTING;
> Index: kern/uipc_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 uipc_usrreq.c
> --- kern/uipc_usrreq.c        26 Aug 2016 07:12:30 -0000      1.102
> +++ kern/uipc_usrreq.c        4 Oct 2016 14:40:29 -0000
> @@ -131,7 +131,10 @@ uipc_usrreq(struct socket *so, int req, 
>               break;
>  
>       case PRU_BIND:
> +             rw_assert_wrlock(&netlock);
> +             rw_exit_write(&netlock);
>               error = unp_bind(unp, nam, p);
> +             rw_enter_write(&netlock);
>               break;
>  
>       case PRU_LISTEN:
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.452
> diff -u -p -r1.452 if.c
> --- net/if.c  3 Oct 2016 12:26:13 -0000       1.452
> +++ net/if.c  4 Oct 2016 14:40:29 -0000
> @@ -163,7 +163,13 @@ void     if_netisr(void *);
>  void ifa_print_all(void);
>  #endif
>  
> -void if_start_locked(struct ifnet *ifp);
> +void if_start_locked(struct ifnet *);
> +int  if_ioctl_locked(struct socket *, u_long, caddr_t, struct proc *);
> +
> +/*
> + * Network lock: serialize socket operations.
> + */
> +struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
>  
>  /*
>   * interface index map
> @@ -836,10 +842,16 @@ if_netisr(void *unused)
>       int s;
>  
>       KERNEL_LOCK();
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>  
>       while ((n = netisr) != 0) {
> -             sched_pause();
> +             /* Like sched_pause() but with a rwlock dance. */
> +             if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
> +                     rw_exit_write(&netlock);
> +                     yield();
> +                     rw_enter_write(&netlock);
> +             }
>  
>               atomic_clearbits_int(&netisr, n);
>  
> @@ -878,6 +890,7 @@ if_netisr(void *unused)
>  #endif
>  
>       splx(s);
> +     rw_exit_write(&netlock);
>       KERNEL_UNLOCK();
>  }
>  
> @@ -1435,6 +1448,7 @@ if_downall(void)
>       struct ifnet *ifp;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splnet();
>       TAILQ_FOREACH(ifp, &ifnet, if_list) {
>               if ((ifp->if_flags & IFF_UP) == 0)
> @@ -1449,6 +1463,7 @@ if_downall(void)
>               }
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
>  
>  /*
> @@ -1508,9 +1523,11 @@ if_linkstate_task(void *xifidx)
>       if (ifp == NULL)
>               return;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       if_linkstate(ifp);
>       splx(s);
> +     rw_exit_write(&netlock);
>  
>       if_put(ifp);
>  }
> @@ -1518,6 +1535,7 @@ if_linkstate_task(void *xifidx)
>  void
>  if_linkstate(struct ifnet *ifp)
>  {
> +     rw_assert_wrlock(&netlock);
>       splsoftassert(IPL_SOFTNET);
>  
>       rt_ifmsg(ifp);
> @@ -1708,6 +1726,18 @@ if_setrdomain(struct ifnet *ifp, int rdo
>   */
>  int
>  ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
> +{
> +     int error;
> +
> +     rw_enter_write(&netlock);
> +     error = if_ioctl_locked(so, cmd, data, p);
> +     rw_exit_write(&netlock);
> +
> +     return (error);
> +}
> +
> +int
> +if_ioctl_locked(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
>  {
>       struct ifnet *ifp;
>       struct ifreq *ifr;
> Index: net/rtsock.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.207
> diff -u -p -r1.207 rtsock.c
> --- net/rtsock.c      27 Sep 2016 18:41:11 -0000      1.207
> +++ net/rtsock.c      4 Oct 2016 14:40:29 -0000
> @@ -296,6 +296,7 @@ route_ctloutput(int op, struct socket *s
>       return (error);
>  }
>  
> +/* XXXSMP */
>  void
>  rt_senddesync(void *data)
>  {
> Index: netinet/ip_carp.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_carp.c,v
> retrieving revision 1.294
> diff -u -p -r1.294 ip_carp.c
> --- netinet/ip_carp.c 4 Oct 2016 13:54:32 -0000       1.294
> +++ netinet/ip_carp.c 4 Oct 2016 14:40:29 -0000
> @@ -1045,6 +1045,7 @@ carp_send_ad(void *v)
>               return;
>       }
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>  
>       /* bow out if we've gone to backup (the carp interface is going down) */
> @@ -1247,6 +1248,7 @@ carp_send_ad(void *v)
>  retry_later:
>       sc->cur_vhe = NULL;
>       splx(s);
> +     rw_exit_write(&netlock);
>       if (advbase != 255 || advskew != 255)
>               timeout_add(&vhe->ad_tmo, tvtohz(&tv));
>  }
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.282
> diff -u -p -r1.282 ip_input.c
> --- netinet/ip_input.c        22 Sep 2016 10:12:25 -0000      1.282
> +++ netinet/ip_input.c        4 Oct 2016 14:40:29 -0000
> @@ -1755,12 +1755,17 @@ ip_send_dispatch(void *xmq)
>       int s;
>  
>       mq_delist(mq, &ml);
> +     if (ml_empty(&ml))
> +             return;
> +
>       KERNEL_LOCK();
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>       KERNEL_UNLOCK();
>  }
>  
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.327
> diff -u -p -r1.327 ip_output.c
> --- netinet/ip_output.c       4 Sep 2016 17:18:56 -0000       1.327
> +++ netinet/ip_output.c       4 Oct 2016 14:40:29 -0000
> @@ -109,6 +109,9 @@ ip_output(struct mbuf *m0, struct mbuf *
>       int rv;
>  #endif
>  
> +     /* Make sure this thread hold the correct lock. */
> +     KASSERT(rw_status(&netlock) == RW_WRITE);
> +
>  #ifdef IPSEC
>       if (inp && (inp->inp_flags & INP_IPV6) != 0)
>               panic("ip_output: IPv6 pcb is passed");
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
> retrieving revision 1.50
> diff -u -p -r1.50 tcp_timer.c
> --- netinet/tcp_timer.c       24 Sep 2016 14:51:37 -0000      1.50
> +++ netinet/tcp_timer.c       4 Oct 2016 14:40:29 -0000
> @@ -112,15 +112,15 @@ tcp_delack(void *arg)
>        * for whatever reason, it will restart the delayed
>        * ACK callout.
>        */
> -
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>       tp->t_flags |= TF_ACKNOW;
>       (void) tcp_output(tp);
> + out:
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
>  
>  /*
> @@ -193,11 +193,10 @@ tcp_timer_rexmt(void *arg)
>       uint32_t rto;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>  
>       if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
>           SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
> @@ -224,8 +223,7 @@ tcp_timer_rexmt(void *arg)
>               sin.sin_addr = tp->t_inpcb->inp_faddr;
>               in_pcbnotifyall(&tcbtable, sintosa(&sin),
>                   tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
> -             splx(s);
> -             return;
> +             goto out;
>       }
>  
>  #ifdef TCP_SACK
> @@ -377,6 +375,7 @@ tcp_timer_rexmt(void *arg)
>  
>   out:
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
>  
>  void
> @@ -386,11 +385,11 @@ tcp_timer_persist(void *arg)
>       uint32_t rto;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       if ((tp->t_flags & TF_DEAD) ||
>              TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
> -             splx(s);
> -             return;
> +             goto out;
>       }
>       tcpstat.tcps_persisttimeo++;
>       /*
> @@ -416,6 +415,7 @@ tcp_timer_persist(void *arg)
>       tp->t_force = 0;
>   out:
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
>  
>  void
> @@ -424,11 +424,10 @@ tcp_timer_keep(void *arg)
>       struct tcpcb *tp = arg;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>  
>       tcpstat.tcps_keeptimeo++;
>       if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
> @@ -457,8 +456,9 @@ tcp_timer_keep(void *arg)
>               TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
>       } else
>               TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> -
> + out:
>       splx(s);
> +     rw_exit_write(&netlock);
>       return;
>  
>   dropit:
> @@ -466,6 +466,7 @@ tcp_timer_keep(void *arg)
>       tp = tcp_drop(tp, ETIMEDOUT);
>  
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
>  
>  void
> @@ -474,11 +475,10 @@ tcp_timer_2msl(void *arg)
>       struct tcpcb *tp = arg;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
> -     if (tp->t_flags & TF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     if (tp->t_flags & TF_DEAD)
> +             goto out;
>  
>  #ifdef TCP_SACK
>       tcp_timer_freesack(tp);
> @@ -490,5 +490,7 @@ tcp_timer_2msl(void *arg)
>       else
>               tp = tcp_close(tp);
>  
> + out:
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
> Index: netinet/tcp_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_input.c,v
> retrieving revision 1.329
> diff -u -p -r1.329 tcp_input.c
> --- netinet/tcp_input.c       4 Oct 2016 13:56:50 -0000       1.329
> +++ netinet/tcp_input.c       4 Oct 2016 14:40:29 -0000
> @@ -3522,11 +3522,10 @@ syn_cache_timer(void *arg)
>       struct syn_cache *sc = arg;
>       int s;
>  
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
> -     if (sc->sc_flags & SCF_DEAD) {
> -             splx(s);
> -             return;
> -     }
> +     if (sc->sc_flags & SCF_DEAD)
> +             goto out;
>  
>       if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
>               /* Drop it -- too many retransmissions. */
> @@ -3549,7 +3548,9 @@ syn_cache_timer(void *arg)
>       sc->sc_rxtshift++;
>       SYN_CACHE_TIMER_ARM(sc);
>  
> + out:
>       splx(s);
> +     rw_exit_write(&netlock);
>       return;
>  
>   dropit:
> @@ -3557,6 +3558,7 @@ syn_cache_timer(void *arg)
>       syn_cache_rm(sc);
>       syn_cache_put(sc);
>       splx(s);
> +     rw_exit_write(&netlock);
>  }
>  
>  void
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.168
> diff -u -p -r1.168 ip6_input.c
> --- netinet6/ip6_input.c      24 Aug 2016 09:41:12 -0000      1.168
> +++ netinet6/ip6_input.c      4 Oct 2016 14:40:29 -0000
> @@ -1429,12 +1429,17 @@ ip6_send_dispatch(void *xmq)
>       int s;
>  
>       mq_delist(mq, &ml);
> +     if (ml_empty(&ml))
> +             return;
> +
>       KERNEL_LOCK();
> +     rw_enter_write(&netlock);
>       s = splsoftnet();
>       while ((m = ml_dequeue(&ml)) != NULL) {
>               ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL);
>       }
>       splx(s);
> +     rw_exit_write(&netlock);
>       KERNEL_UNLOCK();
>  }
>  
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.119
> diff -u -p -r1.119 systm.h
> --- sys/systm.h       24 Sep 2016 18:35:52 -0000      1.119
> +++ sys/systm.h       4 Oct 2016 14:40:29 -0000
> @@ -290,6 +290,11 @@ struct uio;
>  int  uiomove(void *, size_t, struct uio *);
>  
>  #if defined(_KERNEL)
> +/*
> + * Network lock: serialize socket operations.
> + */
> +extern struct rwlock netlock;
> +
>  __returns_twice int  setjmp(label_t *);
>  __dead void  longjmp(label_t *);
>  #endif

Reply via email to