On Mon, Jun 04, 2018 at 03:56:02PM +0200, Martin Pieuchot wrote:
> Diff below adds a 'struct socket *' argument to sounlock() in order to
> prepare the stack for per-socket locks.
> 
> That means sofree() will now unlock a given socket before freeing it.
> But since we do not want to not release the NET_LOCK() when processing
> incoming TCP packets, in_pcbdetach() needs a special treatment.  That's
> also true for unp_drop() as long as Unix sockets will required the
> KERNEL_LOCK().
> 
> This is on top of my previous diff to reduce the number of sofree().
> 
> Comments?  Oks?

I am working on the inpcb layer and also came to the conclusion
that this is necessary.  I have added a mutex to struct inpcb that
needs some control from the socket layer.  My idea was to add
PRU_LOCK and PRU_UNLOCK.  solock() and sounlock() would have to
call them.  Adding the socket to the sounlock() parameter is the
right thing.

The passing s to sofree() and unlock there in looks odd, but may
be a necessary intermediate step.  Basically I have to solve the
same problem in in_pcbdetach().

My diff is not ready yet, go ahead, OK bluhm@

> diff --git sys/kern/sys_socket.c sys/kern/sys_socket.c
> index 916c33a0c1a..a754a7b2698 100644
> --- sys/kern/sys_socket.c
> +++ sys/kern/sys_socket.c
> @@ -88,7 +88,7 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct 
> proc *p)
>                       so->so_state |= SS_NBIO;
>               else
>                       so->so_state &= ~SS_NBIO;
> -             sounlock(s);
> +             sounlock(so, s);
>               break;
>  
>       case FIOASYNC:
> @@ -102,7 +102,7 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, 
> struct proc *p)
>                       so->so_rcv.sb_flags &= ~SB_ASYNC;
>                       so->so_snd.sb_flags &= ~SB_ASYNC;
>               }
> -             sounlock(s);
> +             sounlock(so, s);
>               break;
>  
>       case FIONREAD:
> @@ -176,7 +176,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
>                       so->so_snd.sb_flags |= SB_SEL;
>               }
>       }
> -     sounlock(s);
> +     sounlock(so, s);
>       return (revents);
>  }
>  
> @@ -197,7 +197,7 @@ soo_stat(struct file *fp, struct stat *ub, struct proc *p)
>       ub->st_gid = so->so_egid;
>       (void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
>           (struct mbuf *)ub, NULL, NULL, p));
> -     sounlock(s);
> +     sounlock(so, s);
>       return (0);
>  }
>  
> diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c
> index 211966c79c8..aa789d403cc 100644
> --- sys/kern/uipc_socket.c
> +++ sys/kern/uipc_socket.c
> @@ -142,11 +142,11 @@ socreate(int dom, struct socket **aso, int type, int 
> proto)
>       error = (*prp->pr_attach)(so, proto);
>       if (error) {
>               so->so_state |= SS_NOFDREF;
> -             sofree(so);
> -             sounlock(s);
> +             /* sofree() calls sounlock(). */
> +             sofree(so, s);
>               return (error);
>       }
> -     sounlock(s);
> +     sounlock(so, s);
>       *aso = so;
>       return (0);
>  }
> @@ -177,7 +177,7 @@ solisten(struct socket *so, int backlog)
>       error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
>           curproc);
>       if (error) {
> -             sounlock(s);
> +             sounlock(so, s);
>               return (error);
>       }
>       if (TAILQ_FIRST(&so->so_q) == NULL)
> @@ -187,25 +187,29 @@ solisten(struct socket *so, int backlog)
>       if (backlog < sominconn)
>               backlog = sominconn;
>       so->so_qlimit = backlog;
> -     sounlock(s);
> +     sounlock(so, s);
>       return (0);
>  }
>  
>  void
> -sofree(struct socket *so)
> +sofree(struct socket *so, int s)
>  {
>       soassertlocked(so);
>  
> -     if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
> +     if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
> +             sounlock(so, s);
>               return;
> +     }
>       if (so->so_head) {
>               /*
>                * We must not decommission a socket that's on the accept(2)
>                * queue.  If we do, then accept(2) may hang after select(2)
>                * indicated that the listening socket was ready.
>                */
> -             if (!soqremque(so, 0))
> +             if (!soqremque(so, 0)) {
> +                     sounlock(so, s);
>                       return;
> +             }
>       }
>  #ifdef SOCKET_SPLICE
>       if (so->so_sp) {
> @@ -218,6 +222,7 @@ sofree(struct socket *so)
>  #endif /* SOCKET_SPLICE */
>       sbrelease(so, &so->so_snd);
>       sorflush(so);
> +     sounlock(so, s);
>  #ifdef SOCKET_SPLICE
>       if (so->so_sp) {
>               /* Reuse splice idle, sounsplice() has been called before. */
> @@ -284,8 +289,8 @@ drop:
>  discard:
>       KASSERT((so->so_state & SS_NOFDREF) == 0);
>       so->so_state |= SS_NOFDREF;
> -     sofree(so);
> -     sounlock(s);
> +     /* sofree() calls sounlock(). */
> +     sofree(so, s);
>       return (error);
>  }
>  
> @@ -349,7 +354,7 @@ soconnect2(struct socket *so1, struct socket *so2)
>       s = solock(so1);
>       error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
>           (struct mbuf *)so2, NULL, curproc);
> -     sounlock(s);
> +     sounlock(so1, s);
>       return (error);
>  }
>  
> @@ -478,7 +483,7 @@ restart:
>                               if (flags & MSG_EOR)
>                                       top->m_flags |= M_EOR;
>                       } else {
> -                             sounlock(s);
> +                             sounlock(so, s);
>                               error = m_getuio(&top, atomic, space, uio);
>                               s = solock(so);
>                               if (error)
> @@ -507,7 +512,7 @@ release:
>       so->so_state &= ~SS_ISSENDING;
>       sbunlock(so, &so->so_snd);
>  out:
> -     sounlock(s);
> +     sounlock(so, s);
>       m_freem(top);
>       m_freem(control);
>       return (error);
> @@ -661,7 +666,7 @@ soreceive(struct socket *so, struct mbuf **paddr, struct 
> uio *uio,
>               s = solock(so);
>               error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
>                   (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc);
> -             sounlock(s);
> +             sounlock(so, s);
>               if (error)
>                       goto bad;
>               do {
> @@ -679,7 +684,7 @@ bad:
>       s = solock(so);
>  restart:
>       if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
> -             sounlock(s);
> +             sounlock(so, s);
>               return (error);
>       }
>  
> @@ -747,7 +752,7 @@ restart:
>               sbunlock(so, &so->so_rcv);
>               error = sbwait(so, &so->so_rcv);
>               if (error) {
> -                     sounlock(s);
> +                     sounlock(so, s);
>                       return (error);
>               }
>               goto restart;
> @@ -883,7 +888,7 @@ dontblock:
>                       SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
>                       SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
>                       resid = uio->uio_resid;
> -                     sounlock(s);
> +                     sounlock(so, s);
>                       uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
>                       s = solock(so);
>                       if (uio_error)
> @@ -967,7 +972,7 @@ dontblock:
>                       error = sbwait(so, &so->so_rcv);
>                       if (error) {
>                               sbunlock(so, &so->so_rcv);
> -                             sounlock(s);
> +                             sounlock(so, s);
>                               return (0);
>                       }
>                       if ((m = so->so_rcv.sb_mb) != NULL)
> @@ -1013,7 +1018,7 @@ dontblock:
>               *flagsp |= flags;
>  release:
>       sbunlock(so, &so->so_rcv);
> -     sounlock(s);
> +     sounlock(so, s);
>       return (error);
>  }
>  
> @@ -1039,7 +1044,7 @@ soshutdown(struct socket *so, int how)
>               error = EINVAL;
>               break;
>       }
> -     sounlock(s);
> +     sounlock(so, s);
>  
>       return (error);
>  }
> @@ -1218,7 +1223,7 @@ soidle(void *arg)
>               so->so_error = ETIMEDOUT;
>               sounsplice(so, so->so_sp->ssp_socket, 1);
>       }
> -     sounlock(s);
> +     sounlock(so, s);
>  }
>  
>  void
> @@ -1236,7 +1241,7 @@ sotask(void *arg)
>                */
>               somove(so, M_DONTWAIT);
>       }
> -     sounlock(s);
> +     sounlock(so, s);
>  
>       /* Avoid user land starvation. */
>       yield();
> diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c
> index 8bb11fd97a1..0cb8b6dc98f 100644
> --- sys/kern/uipc_socket2.c
> +++ sys/kern/uipc_socket2.c
> @@ -277,25 +277,38 @@ solock(struct socket *so)
>  {
>       int s = 0;
>  
> -     if ((so->so_proto->pr_domain->dom_family != PF_UNIX) &&
> -         (so->so_proto->pr_domain->dom_family != PF_ROUTE) &&
> -         (so->so_proto->pr_domain->dom_family != PF_KEY))
> +     switch (so->so_proto->pr_domain->dom_family) {
> +     case PF_INET:
> +     case PF_INET6:
> +             s = -42;
>               NET_LOCK();
> -     else {
> +             break;
> +     case PF_UNIX:
> +     case PF_ROUTE:
> +     case PF_KEY:
> +     default:
>               KERNEL_LOCK();
> -             s = -42;
> +             break;
>       }
>  
>       return (s);
>  }
>  
>  void
> -sounlock(int s)
> +sounlock(struct socket *so, int s)
>  {
> -     if (s != -42)
> -             NET_UNLOCK();
> -     else {
> +     switch (so->so_proto->pr_domain->dom_family) {
> +     case PF_INET:
> +     case PF_INET6:
> +             if (s == -42)
> +                     NET_UNLOCK();
> +             break;
> +     case PF_UNIX:
> +     case PF_ROUTE:
> +     case PF_KEY:
> +     default:
>               KERNEL_UNLOCK();
> +             break;
>       }
>  }
>  
> diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
> index 1c23bb59091..a6a6aee173d 100644
> --- sys/kern/uipc_syscalls.c
> +++ sys/kern/uipc_syscalls.c
> @@ -209,7 +209,7 @@ sys_bind(struct proc *p, void *v, register_t *retval)
>  #endif
>       s = solock(so);
>       error = sobind(so, nam, p);
> -     sounlock(s);
> +     sounlock(so, s);
>       m_freem(nam);
>  out:
>       FRELE(fp, p);
> @@ -351,7 +351,7 @@ out:
>                       so->so_state |= SS_NBIO;
>               else
>                       so->so_state &= ~SS_NBIO;
> -             sounlock(s);
> +             sounlock(head, s);
>               fdplock(fdp);
>               fp->f_data = so;
>               fdinsert(fdp, tmpfd, cloexec, fp);
> @@ -359,7 +359,7 @@ out:
>               FRELE(fp, p);
>               *retval = tmpfd;
>       } else {
> -             sounlock(s);
> +             sounlock(head, s);
>               fdplock(fdp);
>               fdremove(fdp, tmpfd);
>               closef(fp, p);
> @@ -437,7 +437,7 @@ bad:
>       if (!interrupted)
>               so->so_state &= ~SS_ISCONNECTING;
>  out:
> -     sounlock(s);
> +     sounlock(so, s);
>       FRELE(fp, p);
>       m_freem(nam);
>       if (error == ERESTART)
> @@ -1000,7 +1000,7 @@ sys_setsockopt(struct proc *p, void *v, register_t 
> *retval)
>       so = fp->f_data;
>       s = solock(so);
>       error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
> -     sounlock(s);
> +     sounlock(so, s);
>  bad:
>       m_freem(m);
>       FRELE(fp, p);
> @@ -1039,7 +1039,7 @@ sys_getsockopt(struct proc *p, void *v, register_t 
> *retval)
>       so = fp->f_data;
>       s = solock(so);
>       error = sogetopt(so, SCARG(uap, level), SCARG(uap, name), m);
> -     sounlock(s);
> +     sounlock(so, s);
>       if (error == 0 && SCARG(uap, val) && valsize && m != NULL) {
>               if (valsize > m->m_len)
>                       valsize = m->m_len;
> @@ -1083,7 +1083,7 @@ sys_getsockname(struct proc *p, void *v, register_t 
> *retval)
>       m = m_getclr(M_WAIT, MT_SONAME);
>       s = solock(so);
>       error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0, p);
> -     sounlock(s);
> +     sounlock(so, s);
>       if (error)
>               goto bad;
>       error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
> @@ -1126,7 +1126,7 @@ sys_getpeername(struct proc *p, void *v, register_t 
> *retval)
>       m = m_getclr(M_WAIT, MT_SONAME);
>       s = solock(so);
>       error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0, p);
> -     sounlock(s);
> +     sounlock(so, s);
>       if (error)
>               goto bad;
>       error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
> diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c
> index 5d95208adc6..81a64a8554a 100644
> --- sys/kern/uipc_usrreq.c
> +++ sys/kern/uipc_usrreq.c
> @@ -612,11 +612,19 @@ unp_drop(struct unpcb *unp, int errno)
>  {
>       struct socket *so = unp->unp_socket;
>  
> +     KERNEL_ASSERT_LOCKED();
> +
>       so->so_error = errno;
>       unp_disconnect(unp);
>       if (so->so_head) {
>               so->so_pcb = NULL;
> -             sofree(so);
> +             /*
> +              * sofree() releases the socket lock, so we need to
> +              * grab it beforehand as long as Unix sockets rely on
> +              * the KERNEL_LOCK();
> +              */
> +             KERNEL_LOCK();
> +             sofree(so, 0);
>               m_freem(unp->unp_addr);
>               free(unp, M_PCB, sizeof *unp);
>       }
> diff --git sys/miscfs/fifofs/fifo_vnops.c sys/miscfs/fifofs/fifo_vnops.c
> index 03a5677a05d..472bfd408a0 100644
> --- sys/miscfs/fifofs/fifo_vnops.c
> +++ sys/miscfs/fifofs/fifo_vnops.c
> @@ -170,7 +170,7 @@ fifo_open(void *v)
>               fip->fi_writers++;
>               if ((ap->a_mode & O_NONBLOCK) && fip->fi_readers == 0) {
>                       error = ENXIO;
> -                     sounlock(s);
> +                     sounlock(wso, s);
>                       goto bad;
>               }
>               if (fip->fi_writers == 1) {
> @@ -179,7 +179,7 @@ fifo_open(void *v)
>                               wakeup(&fip->fi_readers);
>               }
>       }
> -     sounlock(s);
> +     sounlock(wso, s);
>       if ((ap->a_mode & O_NONBLOCK) == 0) {
>               if ((ap->a_mode & FREAD) && fip->fi_writers == 0) {
>                       VOP_UNLOCK(vp);
> @@ -334,7 +334,7 @@ fifo_poll(void *v)
>                       wso->so_snd.sb_flags |= SB_SEL;
>               }
>       }
> -     sounlock(s);
> +     sounlock(rso, s);
>       return (revents);
>  }
>  
> @@ -369,7 +369,7 @@ fifo_close(void *v)
>  
>                       s = solock(wso);
>                       socantsendmore(wso);
> -                     sounlock(s);
> +                     sounlock(wso, s);
>               }
>       }
>       if (ap->a_fflag & FWRITE) {
> @@ -380,7 +380,7 @@ fifo_close(void *v)
>                       /* SS_ISDISCONNECTED will result in POLLHUP */
>                       rso->so_state |= SS_ISDISCONNECTED;
>                       socantrcvmore(rso);
> -                     sounlock(s);
> +                     sounlock(rso, s);
>               }
>       }
>       if (fip->fi_readers == 0 && fip->fi_writers == 0) {
> diff --git sys/net/bfd.c sys/net/bfd.c
> index e3e557e0d37..8bcfb305c99 100644
> --- sys/net/bfd.c
> +++ sys/net/bfd.c
> @@ -611,7 +611,7 @@ bfd_sender(struct bfd_config *bfd, unsigned int port)
>  
>       s = solock(so);
>       error = soconnect(so, m);
> -     sounlock(s);
> +     sounlock(so, s);
>       if (error && error != ECONNREFUSED) {
>               printf("%s: soconnect error %d\n",
>                   __func__, error);
> diff --git sys/net/if_pflow.c sys/net/if_pflow.c
> index 9ba382d5069..1b1c2a4e33e 100644
> --- sys/net/if_pflow.c
> +++ sys/net/if_pflow.c
> @@ -442,7 +442,7 @@ pflow_set(struct pflow_softc *sc, struct pflowreq *pflowr)
>  
>                               s = solock(so);
>                               error = sobind(so, m, p);
> -                             sounlock(s);
> +                             sounlock(so, s);
>                               m_freem(m);
>                               if (error) {
>                                       soclose(so);
> diff --git sys/netinet/in_pcb.c sys/netinet/in_pcb.c
> index 413f9de9df5..62bb9af000c 100644
> --- sys/netinet/in_pcb.c
> +++ sys/netinet/in_pcb.c
> @@ -584,8 +584,13 @@ in_pcbdetach(struct inpcb *inp)
>  
>       NET_ASSERT_LOCKED();
>  
> -     so->so_pcb = 0;
> -     sofree(so);
> +     so->so_pcb = NULL;
> +     /*
> +      * As long as the NET_LOCK() is the default lock for Internet
> +      * sockets, do not release it to not introduce new sleeping
> +      * points.
> +      */
> +     sofree(so, 0);
>       m_freem(inp->inp_options);
>       if (inp->inp_route.ro_rt) {
>               rtfree(inp->inp_route.ro_rt);
> diff --git sys/nfs/krpc_subr.c sys/nfs/krpc_subr.c
> index 346ff9ec989..e487867db1b 100644
> --- sys/nfs/krpc_subr.c
> +++ sys/nfs/krpc_subr.c
> @@ -241,7 +241,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers, 
> u_int func,
>       m->m_len = sizeof(tv);
>       s = solock(so);
>       error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m);
> -     sounlock(s);
> +     sounlock(so, s);
>       m_freem(m);
>       if (error)
>               goto out;
> @@ -257,7 +257,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers, 
> u_int func,
>               *on = 1;
>               s = solock(so);
>               error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m);
> -             sounlock(s);
> +             sounlock(so, s);
>               m_freem(m);
>               if (error)
>                       goto out;
> @@ -274,7 +274,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers, 
> u_int func,
>       *ip = IP_PORTRANGE_LOW;
>       s = solock(so);
>       error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
> -     sounlock(s);
> +     sounlock(so, s);
>       m_freem(mopt);
>       if (error)
>               goto out;
> @@ -288,7 +288,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers, 
> u_int func,
>       sin->sin_port = htons(0);
>       s = solock(so);
>       error = sobind(so, m, &proc0);
> -     sounlock(s);
> +     sounlock(so, s);
>       m_freem(m);
>       if (error) {
>               printf("bind failed\n");
> @@ -301,7 +301,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers, 
> u_int func,
>       *ip = IP_PORTRANGE_DEFAULT;
>       s = solock(so);
>       error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
> -     sounlock(s);
> +     sounlock(so, s);
>       m_freem(mopt);
>       if (error)
>               goto out;
> diff --git sys/nfs/nfs_socket.c sys/nfs/nfs_socket.c
> index 383db1cb930..0119f135ef2 100644
> --- sys/nfs/nfs_socket.c
> +++ sys/nfs/nfs_socket.c
> @@ -365,7 +365,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
>               goto bad;
>       so->so_rcv.sb_flags |= SB_NOINTR;
>       so->so_snd.sb_flags |= SB_NOINTR;
> -     sounlock(s);
> +     sounlock(so, s);
>  
>       m_freem(mopt);
>       m_freem(nam);
> @@ -378,7 +378,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
>       return (0);
>  
>  bad:
> -     sounlock(s);
> +     sounlock(so, s);
>  
>       m_freem(mopt);
>       m_freem(nam);
> diff --git sys/nfs/nfs_syscalls.c sys/nfs/nfs_syscalls.c
> index 527a61a37de..6b501cc419f 100644
> --- sys/nfs/nfs_syscalls.c
> +++ sys/nfs/nfs_syscalls.c
> @@ -250,7 +250,7 @@ nfssvc_addsock(struct file *fp, struct mbuf *mynam)
>       s = solock(so);
>       error = soreserve(so, siz, siz); 
>       if (error) {
> -             sounlock(s);
> +             sounlock(so, s);
>               m_freem(mynam);
>               return (error);
>       }
> @@ -279,7 +279,7 @@ nfssvc_addsock(struct file *fp, struct mbuf *mynam)
>       so->so_rcv.sb_timeo = 0;
>       so->so_snd.sb_flags &= ~SB_NOINTR;
>       so->so_snd.sb_timeo = 0;
> -     sounlock(s);
> +     sounlock(so, s);
>       if (tslp)
>               slp = tslp;
>       else {
> diff --git sys/sys/socketvar.h sys/sys/socketvar.h
> index 097ae3a4ab9..ce8bbdf9f2d 100644
> --- sys/sys/socketvar.h
> +++ sys/sys/socketvar.h
> @@ -311,7 +311,7 @@ int       soconnect(struct socket *so, struct mbuf *nam);
>  int  soconnect2(struct socket *so1, struct socket *so2);
>  int  socreate(int dom, struct socket **aso, int type, int proto);
>  int  sodisconnect(struct socket *so);
> -void sofree(struct socket *so);
> +void sofree(struct socket *so, int);
>  int  sogetopt(struct socket *so, int level, int optname, struct mbuf *m);
>  void sohasoutofband(struct socket *so);
>  void soisconnected(struct socket *so);
> @@ -338,7 +338,7 @@ int       sockargs(struct mbuf **, const void *, size_t, 
> int);
>  
>  int  sosleep(struct socket *, void *, int, const char *, int);
>  int  solock(struct socket *);
> -void sounlock(int);
> +void sounlock(struct socket *, int);
>  
>  int  sendit(struct proc *, int, struct msghdr *, int, register_t *);
>  int  recvit(struct proc *, int, struct msghdr *, caddr_t,

Reply via email to