On Sat, Aug 06, 2022 at 12:20:48AM +0300, Vitaliy Makkoveev wrote:
> > On 5 Aug 2022, at 23:50, Alexander Bluhm <[email protected]> wrote:
> > 
> > On Thu, Aug 04, 2022 at 06:07:49PM +0300, Vitaliy Makkoveev wrote:
> >> Yes please. Also I like (*pr_usrreq)() be splitted to multiple handlers.
> > 
> > With all you feedback diff looks like this now.
> > 
> 
> Did you missed sbwait() modification? I also pointed it should
> release `inp_mtx??? in soreceive() path.

You are talkung about this comment:

> > It seems sbwait() should release `inp_mtx' and keep shared netlock
> > locked.
> Both of netlock and `inp_mtx??? should be released by sbwait(). This
> is fully identical to current behaviour.

sbwait() calls sosleep_nsec().  My diff calls pr_unlock and pr_lock
there.  So it already does that, or I missunderstood something.

The condition "rw_status(&netlock) == RW_READ" to unlock the mutex
is an ugly hack, but it seems to work.

bluhm

> > Index: kern/uipc_socket.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> > retrieving revision 1.280
> > diff -u -p -r1.280 uipc_socket.c
> > --- kern/uipc_socket.c      25 Jul 2022 07:28:22 -0000      1.280
> > +++ kern/uipc_socket.c      4 Aug 2022 21:26:33 -0000
> > @@ -836,10 +836,10 @@ bad:
> >     if (mp)
> >             *mp = NULL;
> > 
> > -   solock(so);
> > +   solock_shared(so);
> > restart:
> >     if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
> > -           sounlock(so);
> > +           sounlock_shared(so);
> >             return (error);
> >     }
> > 
> > @@ -907,7 +907,7 @@ restart:
> >             sbunlock(so, &so->so_rcv);
> >             error = sbwait(so, &so->so_rcv);
> >             if (error) {
> > -                   sounlock(so);
> > +                   sounlock_shared(so);
> >                     return (error);
> >             }
> >             goto restart;
> > @@ -976,11 +976,11 @@ dontblock:
> >                     sbsync(&so->so_rcv, nextrecord);
> >                     if (controlp) {
> >                             if (pr->pr_domain->dom_externalize) {
> > -                                   sounlock(so);
> > +                                   sounlock_shared(so);
> >                                     error =
> >                                         (*pr->pr_domain->dom_externalize)
> >                                         (cm, controllen, flags);
> > -                                   solock(so);
> > +                                   solock_shared(so);
> >                             }
> >                             *controlp = cm;
> >                     } else {
> > @@ -1054,9 +1054,9 @@ dontblock:
> >                     SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
> >                     SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
> >                     resid = uio->uio_resid;
> > -                   sounlock(so);
> > +                   sounlock_shared(so);
> >                     uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
> > -                   solock(so);
> > +                   solock_shared(so);
> >                     if (uio_error)
> >                             uio->uio_resid = resid - len;
> >             } else
> > @@ -1140,7 +1140,7 @@ dontblock:
> >                     error = sbwait(so, &so->so_rcv);
> >                     if (error) {
> >                             sbunlock(so, &so->so_rcv);
> > -                           sounlock(so);
> > +                           sounlock_shared(so);
> >                             return (0);
> >                     }
> >                     if ((m = so->so_rcv.sb_mb) != NULL)
> > @@ -1186,7 +1186,7 @@ dontblock:
> >             *flagsp |= flags;
> > release:
> >     sbunlock(so, &so->so_rcv);
> > -   sounlock(so);
> > +   sounlock_shared(so);
> >     return (error);
> > }
> > 
> > Index: kern/uipc_socket2.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
> > retrieving revision 1.126
> > diff -u -p -r1.126 uipc_socket2.c
> > --- kern/uipc_socket2.c     25 Jul 2022 07:28:22 -0000      1.126
> > +++ kern/uipc_socket2.c     4 Aug 2022 21:45:24 -0000
> > @@ -360,6 +360,24 @@ solock(struct socket *so)
> >     }
> > }
> > 
> > +void
> > +solock_shared(struct socket *so)
> > +{
> > +   switch (so->so_proto->pr_domain->dom_family) {
> > +   case PF_INET:
> > +   case PF_INET6:
> > +           if (so->so_proto->pr_lock != NULL) {
> > +                   NET_RLOCK_IN_SYSCALL();
> > +                   (*so->so_proto->pr_lock)(so);
> > +           } else
> > +                   NET_LOCK();
> > +           break;
> > +   default:
> > +           rw_enter_write(&so->so_lock);
> > +           break;
> > +   }
> > +}
> > +
> > int
> > solock_persocket(struct socket *so)
> > {
> > @@ -403,6 +421,24 @@ sounlock(struct socket *so)
> > }
> > 
> > void
> > +sounlock_shared(struct socket *so)
> > +{
> > +   switch (so->so_proto->pr_domain->dom_family) {
> > +   case PF_INET:
> > +   case PF_INET6:
> > +           if (so->so_proto->pr_unlock != NULL) {
> > +                   (*so->so_proto->pr_unlock)(so);
> > +                   NET_RUNLOCK_IN_SYSCALL();
> > +           } else
> > +                   NET_UNLOCK();
> > +           break;
> > +   default:
> > +           rw_exit_write(&so->so_lock);
> > +           break;
> > +   }
> > +}
> > +
> > +void
> > soassertlocked(struct socket *so)
> > {
> >     switch (so->so_proto->pr_domain->dom_family) {
> > @@ -425,7 +461,15 @@ sosleep_nsec(struct socket *so, void *id
> >     switch (so->so_proto->pr_domain->dom_family) {
> >     case PF_INET:
> >     case PF_INET6:
> > +           if (so->so_proto->pr_unlock != NULL &&
> > +               rw_status(&netlock) == RW_READ) {
> > +                   (*so->so_proto->pr_unlock)(so);
> > +           }
> >             ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
> > +           if (so->so_proto->pr_lock != NULL &&
> > +               rw_status(&netlock) == RW_READ) {
> > +                   (*so->so_proto->pr_lock)(so);
> > +           }
> >             break;
> >     default:
> >             ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs);
> > Index: netinet/in_pcb.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> > retrieving revision 1.268
> > diff -u -p -r1.268 in_pcb.c
> > --- netinet/in_pcb.c        28 Jun 2022 09:32:27 -0000      1.268
> > +++ netinet/in_pcb.c        4 Aug 2022 21:26:33 -0000
> > @@ -236,6 +236,7 @@ in_pcballoc(struct socket *so, struct in
> >     inp->inp_table = table;
> >     inp->inp_socket = so;
> >     refcnt_init_trace(&inp->inp_refcnt, DT_REFCNT_IDX_INPCB);
> > +   mtx_init(&inp->inp_mtx, IPL_SOFTNET);
> >     inp->inp_seclevel[SL_AUTH] = IPSEC_AUTH_LEVEL_DEFAULT;
> >     inp->inp_seclevel[SL_ESP_TRANS] = IPSEC_ESP_TRANS_LEVEL_DEFAULT;
> >     inp->inp_seclevel[SL_ESP_NETWORK] = IPSEC_ESP_NETWORK_LEVEL_DEFAULT;
> > Index: netinet/in_pcb.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> > retrieving revision 1.129
> > diff -u -p -r1.129 in_pcb.h
> > --- netinet/in_pcb.h        15 May 2022 09:12:20 -0000      1.129
> > +++ netinet/in_pcb.h        4 Aug 2022 21:26:33 -0000
> > @@ -79,6 +79,7 @@
> >  *  I       immutable after creation
> >  *  N       net lock
> >  *  t       inpt_mtx                pcb table mutex
> > + * p       inpcb_mtx               pcb mutex
> >  */
> > 
> > struct pf_state_key;
> > @@ -121,6 +122,7 @@ struct inpcb {
> > #define     inp_route       inp_ru.ru_route
> > #define     inp_route6      inp_ru.ru_route6
> >     struct    refcnt inp_refcnt;    /* refcount PCB, delay memory free */
> > +   struct    mutex inp_mtx;        /* protect PCB and socket members */
> >     int       inp_flags;            /* generic IP/datagram flags */
> >     union {                         /* Header prototype. */
> >             struct ip hu_ip;
> > Index: netinet/in_proto.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v
> > retrieving revision 1.98
> > diff -u -p -r1.98 in_proto.c
> > --- netinet/in_proto.c      25 Feb 2022 23:51:03 -0000      1.98
> > +++ netinet/in_proto.c      4 Aug 2022 21:44:07 -0000
> > @@ -387,6 +387,8 @@ const struct protosw inetsw[] = {
> >   .pr_usrreq        = divert_usrreq,
> >   .pr_attach        = divert_attach,
> >   .pr_detach        = divert_detach,
> > +  .pr_lock = divert_lock,
> > +  .pr_unlock       = divert_unlock,
> >   .pr_init  = divert_init,
> >   .pr_sysctl        = divert_sysctl
> > },
> > Index: netinet/ip_divert.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.c,v
> > retrieving revision 1.68
> > diff -u -p -r1.68 ip_divert.c
> > --- netinet/ip_divert.c     9 May 2022 19:33:46 -0000       1.68
> > +++ netinet/ip_divert.c     4 Aug 2022 21:31:30 -0000
> > @@ -221,22 +221,15 @@ divert_packet(struct mbuf *m, int dir, u
> >             if_put(ifp);
> >     }
> > 
> > +   mtx_enter(&inp->inp_mtx);
> >     so = inp->inp_socket;
> > -   /*
> > -    * XXXSMP sbappendaddr() is not MP safe and this function is called
> > -    * from pf with shared netlock.  To call only one sbappendaddr() from
> > -    * divert_packet(), protect it with kernel lock.  All other places
> > -    * call sbappendaddr() with exclusive net lock.  This blocks
> > -    * divert_packet() as we have the shared lock.
> > -    */
> > -   KERNEL_LOCK();
> >     if (sbappendaddr(so, &so->so_rcv, sintosa(&sin), m, NULL) == 0) {
> > -           KERNEL_UNLOCK();
> > +           mtx_leave(&inp->inp_mtx);
> >             divstat_inc(divs_fullsock);
> >             goto bad;
> >     }
> > -   sorwakeup(inp->inp_socket);
> > -   KERNEL_UNLOCK();
> > +   mtx_leave(&inp->inp_mtx);
> > +   sorwakeup(so);
> > 
> >     in_pcbunref(inp);
> >     return;
> > @@ -356,6 +349,24 @@ divert_detach(struct socket *so)
> > 
> >     in_pcbdetach(inp);
> >     return (0);
> > +}
> > +
> > +void
> > +divert_lock(struct socket *so)
> > +{
> > +   struct inpcb *inp = sotoinpcb(so);
> > +
> > +   NET_ASSERT_LOCKED();
> > +   mtx_enter(&inp->inp_mtx);
> > +}
> > +
> > +void
> > +divert_unlock(struct socket *so)
> > +{
> > +   struct inpcb *inp = sotoinpcb(so);
> > +
> > +   NET_ASSERT_LOCKED();
> > +   mtx_leave(&inp->inp_mtx);
> > }
> > 
> > int
> > Index: netinet/ip_divert.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_divert.h,v
> > retrieving revision 1.15
> > diff -u -p -r1.15 ip_divert.h
> > --- netinet/ip_divert.h     5 May 2022 16:44:22 -0000       1.15
> > +++ netinet/ip_divert.h     4 Aug 2022 21:43:44 -0000
> > @@ -72,5 +72,7 @@ int        divert_usrreq(struct socket *,
> >         int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *);
> > int  divert_attach(struct socket *, int);
> > int  divert_detach(struct socket *);
> > +void        divert_lock(struct socket *);
> > +void        divert_unlock(struct socket *);
> > #endif /* _KERNEL */
> > #endif /* _IP_DIVERT_H_ */
> > Index: sys/protosw.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v
> > retrieving revision 1.35
> > diff -u -p -r1.35 protosw.h
> > --- sys/protosw.h   25 Feb 2022 23:51:04 -0000      1.35
> > +++ sys/protosw.h   4 Aug 2022 21:41:19 -0000
> > @@ -83,6 +83,8 @@ struct protosw {
> > 
> >     int     (*pr_attach)(struct socket *, int);
> >     int     (*pr_detach)(struct socket *);
> > +   void    (*pr_lock)(struct socket *);
> > +   void    (*pr_unlock)(struct socket *);
> > 
> > /* utility hooks */
> >     void    (*pr_init)(void);       /* initialization hook */
> > Index: sys/socketvar.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
> > retrieving revision 1.106
> > diff -u -p -r1.106 socketvar.h
> > --- sys/socketvar.h 15 Jul 2022 17:20:24 -0000      1.106
> > +++ sys/socketvar.h 4 Aug 2022 21:26:33 -0000
> > @@ -346,9 +346,11 @@ int    sockargs(struct mbuf **, const void 
> > 
> > int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
> > void        solock(struct socket *);
> > +void       solock_shared(struct socket *);
> > int solock_persocket(struct socket *);
> > void        solock_pair(struct socket *, struct socket *);
> > void        sounlock(struct socket *);
> > +void       sounlock_shared(struct socket *);
> > 
> > int sendit(struct proc *, int, struct msghdr *, int, register_t *);
> > int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);
> > Index: sys/systm.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/sys/systm.h,v
> > retrieving revision 1.157
> > diff -u -p -r1.157 systm.h
> > --- sys/systm.h     12 Jul 2022 17:12:31 -0000      1.157
> > +++ sys/systm.h     4 Aug 2022 21:31:39 -0000
> > @@ -341,6 +341,15 @@ extern struct rwlock netlock;
> > #define     NET_RLOCK_IN_IOCTL()    do { rw_enter_read(&netlock); } while 
> > (0)
> > #define     NET_RUNLOCK_IN_IOCTL()  do { rw_exit_read(&netlock); } while (0)
> > 
> > +/*
> > + * Reader version of NET_LOCK() to be used in send and receive syscall.
> > + *
> > + * Can be grabbed instead of the exclusive version when no field
> > + * protected by the NET_LOCK() is modified by the ioctl/sysctl.
> > + */
> > +#define    NET_RLOCK_IN_SYSCALL()  do { rw_enter_read(&netlock); } while 
> > (0)
> > +#define    NET_RUNLOCK_IN_SYSCALL() do { rw_exit_read(&netlock); } while 
> > (0)
> > +
> > #ifdef DIAGNOSTIC
> > 
> > #define     NET_ASSERT_UNLOCKED()                                           
> > \
> 

Reply via email to