On Mon, Feb 25, 2019 at 10:37:40AM +1000, David Gwynne wrote:
> 
> 
> > On 22 Feb 2019, at 05:01, Martin Pieuchot <m...@openbsd.org> wrote:
> > 
> > On 21/02/19(Thu) 07:35, David Gwynne wrote:
> >>> On 20 Feb 2019, at 11:21 pm, Martin Pieuchot <m...@openbsd.org> wrote:
> >>> 
> >>> On 20/02/19(Wed) 14:44, David Gwynne wrote:
> >>>> Index: sys/net/if.c
> >>>> ===================================================================
> >>>> RCS file: /cvs/src/sys/net/if.c,v
> >>>> retrieving revision 1.571
> >>>> diff -u -p -r1.571 if.c
> >>>> --- sys/net/if.c 9 Jan 2019 01:14:21 -0000       1.571
> >>>> +++ sys/net/if.c 20 Feb 2019 04:35:42 -0000
> >>>> @@ -2143,6 +2143,25 @@ ifioctl(struct socket *so, u_long cmd, c
> >>>>          NET_UNLOCK();
> >>>>          break;
> >>>> 
> >>>> +        case SIOCSETMPWCFG:
> >>>> +        case SIOCSPWE3CTRLWORD:
> >>>> +        case SIOCSPWE3FAT:
> >>>> +        case SIOCSPWE3NEIGHBOR:
> >>>> +        case SIOCDPWE3NEIGHBOR:
> >>>> +                if ((error = suser(p)) != 0)
> >>>> +                        break;
> >>>> +                /* FALLTHROUGH */
> >>>> +        case SIOCGETMPWCFG:
> >>>> +        case SIOCGPWE3CTRLWORD:
> >>>> +        case SIOCGPWE3FAT:
> >>>> +        case SIOCGPWE3NEIGHBOR:
> >>>> +                if_ref(ifp);
> >>>> +                KERNEL_UNLOCK();
> >>>> +                error = ((*ifp->if_ioctl)(ifp, cmd, data));
> >>>> +                KERNEL_LOCK();
> >>>> +                if_put(ifp);
> >>> 
> >>> Why are you referencing the `ifp' and grabbing the KERNEL_LOCK()
> >>> (recursively)?
> >> 
> >> ifioctl gets the ifp pointer from ifunit, which doesn't increase the ref 
> >> count for you. I'm giving up kernel lock around the pwe3 ioctl calls into 
> >> the driver, not taking them harder. Taking the ifp ref there guarantees 
> >> the interface will stay alive^Wallocated over those calls.
> > 
> > It feels premature to me, well I'm confused.  None of the other ioctl
> > handlers do that.  The KERNEL_LOCK() is still held in ifioctl() which
> > guarantees serialization.  If any of the ioctl(2) calls is going to sleep,
> > thus releasing the lock, then I'd suggest to grab the NET_RLOCK() here
> > instead.  It still guarantees serialization of network ioctls w/ regard
> > to detach.
> > 
> > Note that I'll be delighted if you want to remove/push down the NET_LOCK()
> > from this code path, but can we keep the handlers coherent?
> > 
> > Even if we're using refcounting, don't we want to serialize all network
> > ioctl(2)s?  If we're not using the NET_LOCK() for this, can this new lock
> > guarantee that that `ifp' isn't going away?  Or do you have a better
> > idea?
> 
> The network stack implicitly taking locks is hurting me way more
> than it's helping me at the moment, particularly the net lock, so
> I would like to spend some time narrowing that down. If the consensus
> is that it's too much risk for drivers to keep themselves consistent
> then I'd argue for a per ifp rwlock that the ifioctl code could
> take and release.
> 
> Do you want me to do that for the pwe3 ioctls? There's a small
> number of MPLS interfaces, so they'd be good for a test run.
> 
> ifunit() is notionally like if_get except it doesn't give you a
> reference. You have to be holding a lock that prevents the interface
> being removed from the list if you're calling ifunit. The code
> doesn't make it clear whether the lock you need to be holding is
> the kernel lock or the net lock, but the kernel lock is empirically
> good enough. If you give up that lock while holding the ifp, you
> need to account for your reference to it.

deraadt@ talked me down from giving up KERNEL_LOCK. so this is what
the diff would be like if the interface had a lock and it was taken
around the mpls ioctls.

my opinion on the pros and cons of this is:

pro: it keeps the individual driver state consistent cos changes
are serialised by the lock. this means you don't have to think too
hard about the driver locking against itself.

pro: it allows fear free use of ifq_barrier. ifq_barrier cannot deadlock
if the caller isn't holding NET_LOCK. this is the big win because it
supports the model where the ioctl can coordinate with the running stack
by publishing a change and then inserting a barrier to ensure the old
state is no longer in use. without this the ioctl will have to give
up the implicit NET_LOCK it has.

con: only the mpls/pwe3 ioctls are covered. but i have to start
somewhere, right?

Index: if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.571
diff -u -p -r1.571 if.c
--- if.c        9 Jan 2019 01:14:21 -0000       1.571
+++ if.c        25 Feb 2019 04:46:43 -0000
@@ -594,6 +594,8 @@ if_attach_common(struct ifnet *ifp)
 {
        KASSERT(ifp->if_ioctl != NULL);
 
+       rw_init(&ifp->if_lock, ifp->if_xname);
+
        TAILQ_INIT(&ifp->if_addrlist);
        TAILQ_INIT(&ifp->if_maddrlist);
 
@@ -2141,6 +2143,27 @@ ifioctl(struct socket *so, u_long cmd, c
                NET_LOCK();
                ifp->if_llprio = ifr->ifr_llprio;
                NET_UNLOCK();
+               break;
+
+       case SIOCSETMPWCFG:
+       case SIOCSETLABEL:
+       case SIOCSPWE3CTRLWORD:
+       case SIOCSPWE3FAT:
+       case SIOCSPWE3NEIGHBOR:
+       case SIOCDPWE3NEIGHBOR:
+               if ((error = suser(p)) != 0)
+                       break;
+               /* FALLTHROUGH */
+       case SIOCGETMPWCFG:
+       case SIOCGETLABEL:
+       case SIOCGPWE3CTRLWORD:
+       case SIOCGPWE3FAT:
+       case SIOCGPWE3NEIGHBOR:
+               error = rw_enter(&ifp->if_lock, RW_WRITE | RW_INTR);
+               if (error != 0)
+                       break;
+               error = ((*ifp->if_ioctl)(ifp, cmd, data));
+               rw_exit(&ifp->if_lock);
                break;
 
        case SIOCSETKALIVE:
Index: if_mpe.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpe.c,v
retrieving revision 1.85
diff -u -p -r1.85 if_mpe.c
--- if_mpe.c    20 Feb 2019 00:20:19 -0000      1.85
+++ if_mpe.c    25 Feb 2019 04:46:43 -0000
@@ -59,7 +59,6 @@ struct mpe_softc {
        struct ifaddr           sc_ifa;
        struct sockaddr_mpls    sc_smpls;
 
-       struct rwlock           sc_lock;
        int                     sc_dead;
 };
 
@@ -114,7 +113,6 @@ mpe_clone_create(struct if_clone *ifc, i
        ifp->if_hdrlen = MPE_HDRLEN;
        IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
 
-       rw_init(&sc->sc_lock, ifp->if_xname);
        sc->sc_dead = 0;
 
        if_attach(ifp);
@@ -137,15 +135,22 @@ mpe_clone_destroy(struct ifnet *ifp)
 {
        struct mpe_softc        *sc = ifp->if_softc;
 
-       rw_enter_write(&sc->sc_lock);
+       NET_LOCK();
+       CLR(ifp->if_flags, IFF_RUNNING);
+       NET_UNLOCK();
+
+       rw_enter_write(&ifp->if_lock);
        sc->sc_dead = 1;
 
        if (sc->sc_smpls.smpls_label) {
+               NET_LOCK();
                rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
                    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
+               NET_UNLOCK();
        }
+       rw_exit_write(&ifp->if_lock);
 
-       rw_exit_write(&sc->sc_lock);
+       ifq_barrier(&ifp->if_snd);
 
        if_detach(ifp);
        free(sc, M_DEVBUF, sizeof *sc);
@@ -293,22 +298,25 @@ mpe_set_label(struct mpe_softc *sc, uint
 {
        int error;
 
-       rw_assert_wrlock(&sc->sc_lock);
        if (sc->sc_dead)
                return (ENXIO);
 
        if (sc->sc_smpls.smpls_label) {
                /* remove old MPLS route */
+               NET_LOCK();
                rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
                    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
+               NET_UNLOCK();
        }
 
        /* add new MPLS route */
        sc->sc_smpls.smpls_label = label;
        sc->sc_rdomain = rdomain;
 
+       NET_LOCK();
        error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
            smplstosa(&sc->sc_smpls), sc->sc_rdomain);
+       NET_UNLOCK();
        if (error)
                sc->sc_smpls.smpls_label = 0;
 
@@ -345,7 +353,8 @@ mpe_ioctl(struct ifnet *ifp, u_long cmd,
                error = copyout(&shim, ifr->ifr_data, sizeof(shim));
                break;
        case SIOCSETLABEL:
-               if ((error = copyin(ifr->ifr_data, &shim, sizeof(shim))))
+               error = copyin(ifr->ifr_data, &shim, sizeof(shim));
+               if (error != 0)
                        break;
                if (shim.shim_label > MPLS_LABEL_MAX ||
                    shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
@@ -353,12 +362,10 @@ mpe_ioctl(struct ifnet *ifp, u_long cmd,
                        break;
                }
                shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
-               rw_enter_write(&sc->sc_lock);
                if (sc->sc_smpls.smpls_label != shim.shim_label) {
                        error = mpe_set_label(sc, shim.shim_label,
                            sc->sc_rdomain);
                }
-               rw_exit_write(&sc->sc_lock);
                break;
        case SIOCSLIFPHYRTABLE:
                if (ifr->ifr_rdomainid < 0 ||
@@ -368,12 +375,12 @@ mpe_ioctl(struct ifnet *ifp, u_long cmd,
                        error = EINVAL;
                        break;
                }
-               rw_enter_write(&sc->sc_lock);
+               rw_enter_write(&ifp->if_lock);
                if (sc->sc_rdomain != ifr->ifr_rdomainid) {
                        error = mpe_set_label(sc, sc->sc_smpls.smpls_label,
                            ifr->ifr_rdomainid);
                }
-               rw_exit_write(&sc->sc_lock);
+               rw_exit_write(&ifp->if_lock);
                break;
        case SIOCGLIFPHYRTABLE:
                ifr->ifr_rdomainid = sc->sc_rdomain;
Index: if_mpw.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpw.c,v
retrieving revision 1.44
diff -u -p -r1.44 if_mpw.c
--- if_mpw.c    20 Feb 2019 01:04:53 -0000      1.44
+++ if_mpw.c    25 Feb 2019 04:46:43 -0000
@@ -44,6 +44,11 @@
 #include <net/if_vlan_var.h>
 #endif
 
+struct mpw_neighbor {
+       struct shim_hdr         n_rshim;
+       struct sockaddr_storage n_nexthop;
+};
+
 struct mpw_softc {
        struct arpcom           sc_ac;
 #define sc_if                  sc_ac.ac_if
@@ -56,10 +61,8 @@ struct mpw_softc {
        unsigned int            sc_fword;
        uint32_t                sc_flow;
        uint32_t                sc_type;
-       struct shim_hdr         sc_rshim;
-       struct sockaddr_storage sc_nexthop;
+       struct mpw_neighbor     *sc_neighbor;
 
-       struct rwlock           sc_lock;
        unsigned int            sc_dead;
 };
 
@@ -94,6 +97,7 @@ mpw_clone_create(struct if_clone *ifc, i
                return (ENOMEM);
 
        sc->sc_flow = arc4random();
+       sc->sc_neighbor = NULL;
 
        ifp = &sc->sc_if;
        snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
@@ -108,7 +112,6 @@ mpw_clone_create(struct if_clone *ifc, i
        IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
        ether_fakeaddr(ifp);
 
-       rw_init(&sc->sc_lock, ifp->if_xname);
        sc->sc_dead = 0;
 
        if_attach(ifp);
@@ -128,60 +131,277 @@ mpw_clone_destroy(struct ifnet *ifp)
 {
        struct mpw_softc *sc = ifp->if_softc;
 
+       NET_LOCK();
        ifp->if_flags &= ~IFF_RUNNING;
+       NET_UNLOCK();
 
-       rw_enter_write(&sc->sc_lock);
+       rw_enter_write(&ifp->if_lock);
        sc->sc_dead = 1;
 
        if (sc->sc_smpls.smpls_label) {
+               NET_LOCK();
                rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
                    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
+               NET_UNLOCK();
        }
-       rw_exit_write(&sc->sc_lock);
+       rw_exit(&ifp->if_lock);
+
+       ifq_barrier(&ifp->if_snd);
 
        ether_ifdetach(ifp);
        if_detach(ifp);
 
+       free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor));
        free(sc, M_DEVBUF, sizeof(*sc));
 
        return (0);
 }
 
 int
-mpw_set_label(struct mpw_softc *sc, uint32_t label, unsigned int rdomain)
+mpw_set_route(struct mpw_softc *sc, uint32_t label, unsigned int rdomain)
 {
        int error;
 
-       rw_assert_wrlock(&sc->sc_lock);
        if (sc->sc_dead)
                return (ENXIO);
 
        if (sc->sc_smpls.smpls_label) {
+               NET_LOCK();
                rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
                    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
+               NET_UNLOCK();
        }
 
        sc->sc_smpls.smpls_label = label;
        sc->sc_rdomain = rdomain;
 
+       NET_LOCK();
        error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
            smplstosa(&sc->sc_smpls), sc->sc_rdomain);
+       NET_UNLOCK();
        if (error != 0)
                sc->sc_smpls.smpls_label = 0;
 
        return (error);
 }
 
+static int
+mpw_set_neighbor(struct mpw_softc *sc, const struct if_laddrreq *req)
+{
+       struct mpw_neighbor *n, *o;
+       const struct sockaddr_storage *ss;
+       const struct sockaddr_mpls *smpls;
+       uint32_t label;
+
+       smpls = (const struct sockaddr_mpls *)&req->dstaddr;
+
+       if (smpls->smpls_family != AF_MPLS)
+               return (EINVAL);
+       label = smpls->smpls_label;
+       if (label > MPLS_LABEL_MAX || label <= MPLS_LABEL_RESERVED_MAX)
+               return (EINVAL);
+
+       ss = &req->addr;
+       switch (ss->ss_family) {
+       case AF_INET: {
+               const struct sockaddr_in *sin =
+                   (const struct sockaddr_in *)ss;
+
+               if (in_nullhost(sin->sin_addr) ||
+                   IN_MULTICAST(sin->sin_addr.s_addr))
+                       return (EINVAL);
+
+               break;
+       }
+#ifdef INET6
+       case AF_INET6: {
+               const struct sockaddr_in6 *sin6 =
+                   (const struct sockaddr_in6 *)ss;
+
+               if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
+                   IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+                       return (EINVAL);
+
+               /* check scope */
+
+               break;
+       }
+#endif
+       default:
+               return (EAFNOSUPPORT);
+       }
+
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       n = malloc(sizeof(*n), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
+       if (n == NULL)
+               return (ENOMEM);
+
+       n->n_rshim.shim_label = MPLS_LABEL2SHIM(label);
+       n->n_nexthop = *ss;
+
+       o = sc->sc_neighbor;
+       sc->sc_neighbor = n;
+
+       NET_ASSERT_UNLOCKED();
+       ifq_barrier(&sc->sc_if.if_snd);
+
+       free(o, M_DEVBUF, sizeof(*o));
+
+       return (0);
+}
+
+static int
+mpw_get_neighbor(struct mpw_softc *sc, struct if_laddrreq *req)
+{
+       struct sockaddr_mpls *smpls = (struct sockaddr_mpls *)&req->dstaddr;
+       struct mpw_neighbor *n = sc->sc_neighbor;
+
+       if (n == NULL)
+               return (EADDRNOTAVAIL);
+
+       smpls->smpls_len = sizeof(*smpls);
+       smpls->smpls_family = AF_MPLS;
+       smpls->smpls_label = MPLS_SHIM2LABEL(n->n_rshim.shim_label);
+
+       req->addr = n->n_nexthop;
+
+       return (0);
+}
+
+static int
+mpw_del_neighbor(struct mpw_softc *sc)
+{
+       struct mpw_neighbor *o;
+
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       o = sc->sc_neighbor;
+       sc->sc_neighbor = NULL;
+
+       ifq_barrier(&sc->sc_if.if_snd);
+
+       free(o, M_DEVBUF, sizeof(*o));
+
+       return (0);
+}
+
+static int
+mpw_set_label(struct mpw_softc *sc, const struct shim_hdr *label)
+{
+       uint32_t shim;
+
+       if (label->shim_label > MPLS_LABEL_MAX ||
+           label->shim_label <= MPLS_LABEL_RESERVED_MAX)
+               return (EINVAL);
+
+       shim = MPLS_LABEL2SHIM(label->shim_label);
+       if (sc->sc_smpls.smpls_label == shim)
+               return (0);
+
+       return (mpw_set_route(sc, shim, sc->sc_rdomain));
+}
+
+static int
+mpw_get_label(struct mpw_softc *sc, struct ifreq *ifr)
+{
+       struct shim_hdr label;
+
+       label.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
+       if (label.shim_label == MPLS_LABEL2SHIM(0))
+               return (EADDRNOTAVAIL);
+
+       return (copyout(&label, ifr->ifr_data, sizeof(label)));
+}
+
+static int
+mpw_del_label(struct mpw_softc *sc)
+{
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
+               NET_LOCK();
+               rt_ifa_del(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
+                   smplstosa(&sc->sc_smpls), 0);
+               NET_UNLOCK();
+       }
+
+       sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0);
+
+       return (0);
+}
+
+static int
+mpw_set_config(struct mpw_softc *sc, const struct ifreq *ifr)
+{
+       struct ifmpwreq imr;
+       struct if_laddrreq req;
+       struct sockaddr_mpls *smpls;
+       struct sockaddr_in *sin;
+       int error;
+
+       error = copyin(ifr->ifr_data, &imr, sizeof(imr));
+       if (error != 0)
+               return (error);
+
+       /* Teardown all configuration if got no nexthop */
+       sin = (struct sockaddr_in *)&imr.imr_nexthop;
+       if (sin->sin_addr.s_addr == 0) {
+               mpw_del_label(sc);
+               mpw_del_neighbor(sc);
+               sc->sc_cword = 0;
+               sc->sc_type = 0;
+               return (0);
+       }
+
+       error = mpw_set_label(sc, &imr.imr_lshim);
+       if (error != 0)
+               return (error);
+
+       smpls = (struct sockaddr_mpls *)&req.dstaddr;
+       smpls->smpls_family = AF_MPLS;
+       smpls->smpls_label = imr.imr_rshim.shim_label;
+       req.addr = imr.imr_nexthop;
+
+       error = mpw_set_neighbor(sc, &req);
+       if (error != 0)
+               return (error);
+
+       sc->sc_cword = ISSET(imr.imr_flags, IMR_FLAG_CONTROLWORD);
+       sc->sc_type = imr.imr_type;
+
+       return (0);
+}
+
+static int
+mpw_get_config(struct mpw_softc *sc, const struct ifreq *ifr)
+{
+       struct ifmpwreq imr;
+
+       memset(&imr, 0, sizeof(imr));
+       imr.imr_flags = sc->sc_cword ? IMR_FLAG_CONTROLWORD : 0;
+       imr.imr_type = sc->sc_type;
+
+       imr.imr_lshim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
+       if (sc->sc_neighbor) {
+               imr.imr_rshim.shim_label =
+                   MPLS_SHIM2LABEL(sc->sc_neighbor->n_rshim.shim_label);
+               imr.imr_nexthop = sc->sc_neighbor->n_nexthop;
+       }
+
+       return (copyout(&imr, ifr->ifr_data, sizeof(imr)));
+}
+
 int
 mpw_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
        struct ifreq *ifr = (struct ifreq *) data;
        struct mpw_softc *sc = ifp->if_softc;
        struct shim_hdr shim;
-       struct sockaddr_in *sin;
-       struct sockaddr_in *sin_nexthop;
        int error = 0;
-       struct ifmpwreq imr;
 
        switch (cmd) {
        case SIOCSIFFLAGS:
@@ -194,99 +414,45 @@ mpw_ioctl(struct ifnet *ifp, u_long cmd,
        case SIOCGPWE3:
                ifr->ifr_pwe3 = IF_PWE3_ETHERNET;
                break;
+       case SIOCSPWE3CTRLWORD:
+               sc->sc_cword = ifr->ifr_pwe3 ? 1 : 0;
+               break;
+       case SIOCGPWE3CTRLWORD:
+               ifr->ifr_pwe3 = sc->sc_cword;
+               break;
+       case SIOCSPWE3FAT:
+               sc->sc_fword = ifr->ifr_pwe3 ? 1 : 0;
+               break;
+       case SIOCGPWE3FAT:
+               ifr->ifr_pwe3 = sc->sc_fword;
+               break;
+
+       case SIOCSPWE3NEIGHBOR:
+               error = mpw_set_neighbor(sc, (struct if_laddrreq *)data);
+               break;
+       case SIOCGPWE3NEIGHBOR:
+               error = mpw_get_neighbor(sc, (struct if_laddrreq *)data);
+               break;
+       case SIOCDPWE3NEIGHBOR:
+               error = mpw_del_neighbor(sc);
+               break;
 
        case SIOCGETLABEL:
-               shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
-               error = copyout(&shim, ifr->ifr_data, sizeof(shim));
+               error = mpw_get_label(sc, ifr);
                break;
        case SIOCSETLABEL:
-               if ((error = copyin(ifr->ifr_data, &shim, sizeof(shim))))
-                       break;
-               if (shim.shim_label > MPLS_LABEL_MAX ||
-                   shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
-                       error = EINVAL;
+               error = copyin(ifr->ifr_data, &shim, sizeof(shim));
+               if (error != 0)
                        break;
-               }
-               shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
-               rw_enter_write(&sc->sc_lock);
-               if (sc->sc_smpls.smpls_label != shim.shim_label) {
-                       error = mpw_set_label(sc, shim.shim_label,
-                           sc->sc_rdomain);
-               }
-               rw_exit_write(&sc->sc_lock);
+               error = mpw_set_label(sc, &shim);
                break;
 
        case SIOCSETMPWCFG:
-               error = suser(curproc);
-               if (error != 0)
-                       break;
-
-               error = copyin(ifr->ifr_data, &imr, sizeof(imr));
-               if (error != 0)
-                       break;
-
-               /* Teardown all configuration if got no nexthop */
-               sin = (struct sockaddr_in *) &imr.imr_nexthop;
-               if (sin->sin_addr.s_addr == 0) {
-                       if (rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
-                           smplstosa(&sc->sc_smpls), 0) == 0)
-                               sc->sc_smpls.smpls_label = 0;
-
-                       memset(&sc->sc_rshim, 0, sizeof(sc->sc_rshim));
-                       memset(&sc->sc_nexthop, 0, sizeof(sc->sc_nexthop));
-                       sc->sc_cword = 0;
-                       sc->sc_type = 0;
-                       break;
-               }
-
-               /* Validate input */
-               if (sin->sin_family != AF_INET ||
-                   imr.imr_lshim.shim_label > MPLS_LABEL_MAX ||
-                   imr.imr_lshim.shim_label <= MPLS_LABEL_RESERVED_MAX ||
-                   imr.imr_rshim.shim_label > MPLS_LABEL_MAX ||
-                   imr.imr_rshim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
-                       error = EINVAL;
-                       break;
-               }
-
-               /* Setup labels and create inbound route */
-               imr.imr_lshim.shim_label =
-                   MPLS_LABEL2SHIM(imr.imr_lshim.shim_label);
-               imr.imr_rshim.shim_label =
-                   MPLS_LABEL2SHIM(imr.imr_rshim.shim_label);
-
-               rw_enter_write(&sc->sc_lock);
-               if (sc->sc_smpls.smpls_label != imr.imr_lshim.shim_label) {
-                       error = mpw_set_label(sc, imr.imr_lshim.shim_label,
-                           sc->sc_rdomain);
-               }
-               rw_exit_write(&sc->sc_lock);
-               if (error != 0)
-                       break;
-
-               /* Apply configuration */
-               sc->sc_cword = ISSET(imr.imr_flags, IMR_FLAG_CONTROLWORD);
-               sc->sc_type = imr.imr_type;
-               sc->sc_rshim.shim_label = imr.imr_rshim.shim_label;
-
-               memset(&sc->sc_nexthop, 0, sizeof(sc->sc_nexthop));
-               sin_nexthop = (struct sockaddr_in *) &sc->sc_nexthop;
-               sin_nexthop->sin_family = sin->sin_family;
-               sin_nexthop->sin_len = sizeof(struct sockaddr_in);
-               sin_nexthop->sin_addr.s_addr = sin->sin_addr.s_addr;
+               error = mpw_set_config(sc, ifr);
                break;
 
        case SIOCGETMPWCFG:
-               imr.imr_flags = sc->sc_cword ? IMR_FLAG_CONTROLWORD : 0;
-               imr.imr_type = sc->sc_type;
-               imr.imr_lshim.shim_label =
-                   MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
-               imr.imr_rshim.shim_label =
-                   MPLS_SHIM2LABEL(sc->sc_rshim.shim_label);
-               memcpy(&imr.imr_nexthop, &sc->sc_nexthop,
-                   sizeof(imr.imr_nexthop));
-
-               error = copyout(&imr, ifr->ifr_data, sizeof(imr));
+               error = mpw_get_config(sc, ifr);
                break;
 
        case SIOCSLIFPHYRTABLE:
@@ -297,18 +463,17 @@ mpw_ioctl(struct ifnet *ifp, u_long cmd,
                        error = EINVAL;
                        break;
                }
-               rw_enter_write(&sc->sc_lock);
+               rw_enter_write(&ifp->if_lock);
                if (sc->sc_rdomain != ifr->ifr_rdomainid) {
-                       error = mpw_set_label(sc, sc->sc_smpls.smpls_label,
+                       error = mpw_set_route(sc, sc->sc_smpls.smpls_label,
                            ifr->ifr_rdomainid);
                }
-               rw_exit_write(&sc->sc_lock);
+               rw_exit_write(&ifp->if_lock);
                break;
        case SIOCGLIFPHYRTABLE:
                ifr->ifr_rdomainid = sc->sc_rdomain;
                break;
 
-
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                break;
@@ -440,20 +605,22 @@ mpw_start(struct ifnet *ifp)
        struct ifnet *ifp0;
        struct mbuf *m, *m0;
        struct shim_hdr *shim;
+       struct mpw_neighbor *n;
        struct sockaddr_mpls smpls = {
                .smpls_len = sizeof(smpls),
                .smpls_family = AF_MPLS,
        };
        uint32_t bos;
 
+       n = sc->sc_neighbor;
        if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
-           sc->sc_rshim.shim_label == 0 ||
-           sc->sc_type == IMR_TYPE_NONE) {
+           sc->sc_type == IMR_TYPE_NONE ||
+           n == NULL) {
                IFQ_PURGE(&ifp->if_snd);
                return;
        }
 
-       rt = rtalloc(sstosa(&sc->sc_nexthop), RT_RESOLVE, sc->sc_rdomain);
+       rt = rtalloc(sstosa(&n->n_nexthop), RT_RESOLVE, sc->sc_rdomain);
        if (!rtisvalid(rt)) {
                IFQ_PURGE(&ifp->if_snd);
                goto rtfree;
@@ -515,7 +682,7 @@ mpw_start(struct ifnet *ifp)
 
                shim = mtod(m0, struct shim_hdr *);
                shim->shim_label = htonl(mpls_defttl) & MPLS_TTL_MASK;
-               shim->shim_label |= sc->sc_rshim.shim_label | bos;
+               shim->shim_label |= n->n_rshim.shim_label | bos;
 
                m0->m_pkthdr.ph_rtableid = ifp->if_rdomain;
 
Index: if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.94
diff -u -p -r1.94 if_var.h
--- if_var.h    9 Jan 2019 01:14:21 -0000       1.94
+++ if_var.h    25 Feb 2019 04:46:43 -0000
@@ -41,6 +41,7 @@
 #include <sys/queue.h>
 #include <sys/mbuf.h>
 #include <sys/srp.h>
+#include <sys/rwlock.h>
 #include <sys/refcnt.h>
 #include <sys/task.h>
 #include <sys/time.h>
@@ -117,6 +118,7 @@ TAILQ_HEAD(ifnet_head, ifnet);              /* the a
 struct ifnet {                         /* and the entries */
        void    *if_softc;              /* lower-level data for this if */
        struct  refcnt if_refcnt;
+       struct  rwlock if_lock;
        TAILQ_ENTRY(ifnet) if_list;     /* [k] all struct ifnets are chained */
        TAILQ_HEAD(, ifaddr) if_addrlist; /* [N] list of addresses per if */
        TAILQ_HEAD(, ifmaddr) if_maddrlist; /* [N] list of multicast records */

Reply via email to