On Mon, Feb 25, 2019 at 03:28:58PM +1000, David Gwynne wrote:
> On Mon, Feb 25, 2019 at 10:37:40AM +1000, David Gwynne wrote:
> > 
> > 
> > > On 22 Feb 2019, at 05:01, Martin Pieuchot <m...@openbsd.org> wrote:
> > > 
> > > On 21/02/19(Thu) 07:35, David Gwynne wrote:
> > >>> On 20 Feb 2019, at 11:21 pm, Martin Pieuchot <m...@openbsd.org> wrote:
> > >>> 
> > >>> On 20/02/19(Wed) 14:44, David Gwynne wrote:
> > >>>> Index: sys/net/if.c
> > >>>> ===================================================================
> > >>>> RCS file: /cvs/src/sys/net/if.c,v
> > >>>> retrieving revision 1.571
> > >>>> diff -u -p -r1.571 if.c
> > >>>> --- sys/net/if.c       9 Jan 2019 01:14:21 -0000       1.571
> > >>>> +++ sys/net/if.c       20 Feb 2019 04:35:42 -0000
> > >>>> @@ -2143,6 +2143,25 @@ ifioctl(struct socket *so, u_long cmd, c
> > >>>>                NET_UNLOCK();
> > >>>>                break;
> > >>>> 
> > >>>> +      case SIOCSETMPWCFG:
> > >>>> +      case SIOCSPWE3CTRLWORD:
> > >>>> +      case SIOCSPWE3FAT:
> > >>>> +      case SIOCSPWE3NEIGHBOR:
> > >>>> +      case SIOCDPWE3NEIGHBOR:
> > >>>> +              if ((error = suser(p)) != 0)
> > >>>> +                      break;
> > >>>> +              /* FALLTHROUGH */
> > >>>> +      case SIOCGETMPWCFG:
> > >>>> +      case SIOCGPWE3CTRLWORD:
> > >>>> +      case SIOCGPWE3FAT:
> > >>>> +      case SIOCGPWE3NEIGHBOR:
> > >>>> +              if_ref(ifp);
> > >>>> +              KERNEL_UNLOCK();
> > >>>> +              error = ((*ifp->if_ioctl)(ifp, cmd, data));
> > >>>> +              KERNEL_LOCK();
> > >>>> +              if_put(ifp);
> > >>> 
> > >>> Why are you referencing the `ifp' and grabbing the KERNEL_LOCK()
> > >>> (recursively)?
> > >> 
> > >> ifioctl gets the ifp pointer from ifunit, which doesn't increase the ref 
> > >> count for you. I'm giving up kernel lock around the pwe3 ioctl calls 
> > >> into the driver, not taking them harder. Taking the ifp ref there 
> > >> guarantees the interface will stay alive^Wallocated over those calls.
> > > 
> > > It feels premature to me, well I'm confused.  None of the other ioctl
> > > handlers do that.  The KERNEL_LOCK() is still held in ifioctl() which
> > > guarantees serialization.  If any of the ioctl(2) calls is going to sleep,
> > > thus releasing the lock, then I'd suggest to grab the NET_RLOCK() here
> > > instead.  It still guarantees serialization of network ioctls w/ regard
> > > to detach.
> > > 
> > > Note that I'll be delighted if you want to remove/push down the NET_LOCK()
> > > from this code path, but can we keep the handlers coherent?
> > > 
> > > Even if we're using refcounting, don't we want to serialize all network
> > > ioctl(2)s?  If we're not using the NET_LOCK() for this, can this new lock
> > > guarantee that that `ifp' isn't going away?  Or do you have a better
> > > idea?
> > 
> > The network stack implicitly taking locks is hurting me way more
> > than it's helping me at the moment, particularly the net lock, so
> > I would like to spend some time narrowing that down. If the consensus
> > is that it's too much risk for drivers to keep themselves consistent
> > then I'd argue for a per ifp rwlock that the ifioctl code could
> > take and release.
> > 
> > Do you want me to do that for the pwe3 ioctls? There's a small
> > number of MPLS interfaces, so they'd be good for a test run.
> > 
> > ifunit() is notionally like if_get except it doesn't give you a
> > reference. You have to be holding a lock that prevents the interface
> > being removed from the list if you're calling ifunit. The code
> > doesn't make it clear whether the lock you need to be holding is
> > the kernel lock or the net lock, but the kernel lock is empirically
> > good enough. If you give up that lock while holding the ifp, you
> > need to account for your reference to it.
> 
> deraadt@ talked me down from giving up KERNEL_LOCK. so this is what
> the diff would be like if the interface had a lock and it was taken
> around the mpls ioctls.
> 
> my opinion on the pros and cons of this is:
> 
> pro: it keeps the individual driver state consistent cos changes
> are serialised by the lock. this means you don't have to think too
> hard about the driver locking against itself.
> 
> pro: it allows fear free use of ifq_barrier. ifq_barrier cannot deadlock
> if the caller isn't holding NET_LOCK. this is the big win because it
> supports the model where the ioctl can coordinate with the running stack
> by publishing a change and then inserting a barrier to ensure the old
> state is no longer in use. without this the ioctl will have to give
> up the implicit NET_LOCK it has.
> 
> con: only the mpls/pwe3 ioctls are covered. but i have to start
> somewhere, right?

actually, now i have my head around the clone destroy stuff i can
(ab)use the net lock.

this should be more acceptable to everyone.

Index: sys/sys/sockio.h
===================================================================
RCS file: /cvs/src/sys/sys/sockio.h,v
retrieving revision 1.79
diff -u -p -r1.79 sockio.h
--- sys/sys/sockio.h    23 Jan 2019 08:23:18 -0000      1.79
+++ sys/sys/sockio.h    25 Feb 2019 09:58:15 -0000
@@ -143,6 +143,7 @@
 #define        SIOCSSPPPPARAMS  _IOW('i', 147, struct ifreq)   /* set pppoe 
params */
 #define        SIOCGSPPPPARAMS _IOWR('i', 148, struct ifreq)   /* get pppoe 
params */
 
+#define SIOCDELLABEL    _IOW('i', 151, struct ifreq)   /* del MPLS label */
 #define SIOCGPWE3       _IOWR('i', 152, struct ifreq)  /* get MPLS PWE3 cap */
 #define SIOCSETLABEL    _IOW('i', 153, struct ifreq)   /* set MPLS label */
 #define SIOCGETLABEL    _IOW('i', 154, struct ifreq)   /* get MPLS label */
@@ -204,6 +205,14 @@
 
 #define        SIOCSLIFPHYECN  _IOW('i', 199, struct ifreq)    /* set ecn 
copying */
 #define        SIOCGLIFPHYECN  _IOWR('i', 200, struct ifreq)   /* get ecn 
copying */
+
+#define SIOCSPWE3CTRLWORD      _IOW('i', 220, struct ifreq)
+#define SIOCGPWE3CTRLWORD      _IOWR('i',  220, struct ifreq)
+#define SIOCSPWE3FAT           _IOW('i', 221, struct ifreq)
+#define SIOCGPWE3FAT           _IOWR('i', 221, struct ifreq)
+#define SIOCSPWE3NEIGHBOR      _IOW('i', 222, struct if_laddrreq)
+#define SIOCGPWE3NEIGHBOR      _IOWR('i', 222, struct if_laddrreq)
+#define SIOCDPWE3NEIGHBOR      _IOW('i', 222, struct ifreq)
 
 #define        SIOCSVH         _IOWR('i', 245, struct ifreq)   /* set carp 
param */
 #define        SIOCGVH         _IOWR('i', 246, struct ifreq)   /* get carp 
param */
Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.571
diff -u -p -r1.571 if.c
--- sys/net/if.c        9 Jan 2019 01:14:21 -0000       1.571
+++ sys/net/if.c        25 Feb 2019 09:58:15 -0000
@@ -2159,6 +2159,12 @@ ifioctl(struct socket *so, u_long cmd, c
        case SIOCSIFPAIR:
        case SIOCSIFPARENT:
        case SIOCDIFPARENT:
+       case SIOCSETMPWCFG:
+       case SIOCSETLABEL:
+       case SIOCSPWE3CTRLWORD:
+       case SIOCSPWE3FAT:
+       case SIOCSPWE3NEIGHBOR:
+       case SIOCDPWE3NEIGHBOR:
                if ((error = suser(p)) != 0)
                        break;
                /* FALLTHROUGH */
Index: sys/net/if_mpw.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpw.c,v
retrieving revision 1.44
diff -u -p -r1.44 if_mpw.c
--- sys/net/if_mpw.c    20 Feb 2019 01:04:53 -0000      1.44
+++ sys/net/if_mpw.c    25 Feb 2019 09:58:15 -0000
@@ -44,6 +44,11 @@
 #include <net/if_vlan_var.h>
 #endif
 
+struct mpw_neighbor {
+       struct shim_hdr         n_rshim;
+       struct sockaddr_storage n_nexthop;
+};
+
 struct mpw_softc {
        struct arpcom           sc_ac;
 #define sc_if                  sc_ac.ac_if
@@ -56,10 +61,8 @@ struct mpw_softc {
        unsigned int            sc_fword;
        uint32_t                sc_flow;
        uint32_t                sc_type;
-       struct shim_hdr         sc_rshim;
-       struct sockaddr_storage sc_nexthop;
+       struct mpw_neighbor     *sc_neighbor;
 
-       struct rwlock           sc_lock;
        unsigned int            sc_dead;
 };
 
@@ -94,6 +97,7 @@ mpw_clone_create(struct if_clone *ifc, i
                return (ENOMEM);
 
        sc->sc_flow = arc4random();
+       sc->sc_neighbor = NULL;
 
        ifp = &sc->sc_if;
        snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
@@ -108,7 +112,6 @@ mpw_clone_create(struct if_clone *ifc, i
        IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
        ether_fakeaddr(ifp);
 
-       rw_init(&sc->sc_lock, ifp->if_xname);
        sc->sc_dead = 0;
 
        if_attach(ifp);
@@ -128,31 +131,32 @@ mpw_clone_destroy(struct ifnet *ifp)
 {
        struct mpw_softc *sc = ifp->if_softc;
 
+       NET_LOCK();
        ifp->if_flags &= ~IFF_RUNNING;
-
-       rw_enter_write(&sc->sc_lock);
        sc->sc_dead = 1;
 
        if (sc->sc_smpls.smpls_label) {
                rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
                    smplstosa(&sc->sc_smpls), sc->sc_rdomain);
        }
-       rw_exit_write(&sc->sc_lock);
+       NET_UNLOCK();
+
+       ifq_barrier(&ifp->if_snd);
 
        ether_ifdetach(ifp);
        if_detach(ifp);
 
+       free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor));
        free(sc, M_DEVBUF, sizeof(*sc));
 
        return (0);
 }
 
 int
-mpw_set_label(struct mpw_softc *sc, uint32_t label, unsigned int rdomain)
+mpw_set_route(struct mpw_softc *sc, uint32_t label, unsigned int rdomain)
 {
        int error;
 
-       rw_assert_wrlock(&sc->sc_lock);
        if (sc->sc_dead)
                return (ENXIO);
 
@@ -172,16 +176,224 @@ mpw_set_label(struct mpw_softc *sc, uint
        return (error);
 }
 
+static int
+mpw_set_neighbor(struct mpw_softc *sc, const struct if_laddrreq *req)
+{
+       struct mpw_neighbor *n, *o;
+       const struct sockaddr_storage *ss;
+       const struct sockaddr_mpls *smpls;
+       uint32_t label;
+
+       smpls = (const struct sockaddr_mpls *)&req->dstaddr;
+
+       if (smpls->smpls_family != AF_MPLS)
+               return (EINVAL);
+       label = smpls->smpls_label;
+       if (label > MPLS_LABEL_MAX || label <= MPLS_LABEL_RESERVED_MAX)
+               return (EINVAL);
+
+       ss = &req->addr;
+       switch (ss->ss_family) {
+       case AF_INET: {
+               const struct sockaddr_in *sin =
+                   (const struct sockaddr_in *)ss;
+
+               if (in_nullhost(sin->sin_addr) ||
+                   IN_MULTICAST(sin->sin_addr.s_addr))
+                       return (EINVAL);
+
+               break;
+       }
+#ifdef INET6
+       case AF_INET6: {
+               const struct sockaddr_in6 *sin6 =
+                   (const struct sockaddr_in6 *)ss;
+
+               if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
+                   IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+                       return (EINVAL);
+
+               /* check scope */
+
+               break;
+       }
+#endif
+       default:
+               return (EAFNOSUPPORT);
+       }
+
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       n = malloc(sizeof(*n), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
+       if (n == NULL)
+               return (ENOMEM);
+
+       n->n_rshim.shim_label = MPLS_LABEL2SHIM(label);
+       n->n_nexthop = *ss;
+
+       o = sc->sc_neighbor;
+       sc->sc_neighbor = n;
+
+       NET_UNLOCK();
+       ifq_barrier(&sc->sc_if.if_snd);
+       NET_LOCK();
+
+       free(o, M_DEVBUF, sizeof(*o));
+
+       return (0);
+}
+
+static int
+mpw_get_neighbor(struct mpw_softc *sc, struct if_laddrreq *req)
+{
+       struct sockaddr_mpls *smpls = (struct sockaddr_mpls *)&req->dstaddr;
+       struct mpw_neighbor *n = sc->sc_neighbor;
+
+       if (n == NULL)
+               return (EADDRNOTAVAIL);
+
+       smpls->smpls_len = sizeof(*smpls);
+       smpls->smpls_family = AF_MPLS;
+       smpls->smpls_label = MPLS_SHIM2LABEL(n->n_rshim.shim_label);
+
+       req->addr = n->n_nexthop;
+
+       return (0);
+}
+
+static int
+mpw_del_neighbor(struct mpw_softc *sc)
+{
+       struct mpw_neighbor *o;
+
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       o = sc->sc_neighbor;
+       sc->sc_neighbor = NULL;
+
+       NET_UNLOCK();
+       ifq_barrier(&sc->sc_if.if_snd);
+       NET_LOCK();
+
+       free(o, M_DEVBUF, sizeof(*o));
+
+       return (0);
+}
+
+static int
+mpw_set_label(struct mpw_softc *sc, const struct shim_hdr *label)
+{
+       uint32_t shim;
+
+       if (label->shim_label > MPLS_LABEL_MAX ||
+           label->shim_label <= MPLS_LABEL_RESERVED_MAX)
+               return (EINVAL);
+
+       shim = MPLS_LABEL2SHIM(label->shim_label);
+       if (sc->sc_smpls.smpls_label == shim)
+               return (0);
+
+       return (mpw_set_route(sc, shim, sc->sc_rdomain));
+}
+
+static int
+mpw_get_label(struct mpw_softc *sc, struct ifreq *ifr)
+{
+       struct shim_hdr label;
+
+       label.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
+       if (label.shim_label == MPLS_LABEL2SHIM(0))
+               return (EADDRNOTAVAIL);
+
+       return (copyout(&label, ifr->ifr_data, sizeof(label)));
+}
+
+static int
+mpw_del_label(struct mpw_softc *sc)
+{
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) {
+               rt_ifa_del(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
+                   smplstosa(&sc->sc_smpls), 0);
+       }
+
+       sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0);
+
+       return (0);
+}
+
+static int
+mpw_set_config(struct mpw_softc *sc, const struct ifreq *ifr)
+{
+       struct ifmpwreq imr;
+       struct if_laddrreq req;
+       struct sockaddr_mpls *smpls;
+       struct sockaddr_in *sin;
+       int error;
+
+       error = copyin(ifr->ifr_data, &imr, sizeof(imr));
+       if (error != 0)
+               return (error);
+
+       /* Teardown all configuration if got no nexthop */
+       sin = (struct sockaddr_in *)&imr.imr_nexthop;
+       if (sin->sin_addr.s_addr == 0) {
+               mpw_del_label(sc);
+               mpw_del_neighbor(sc);
+               sc->sc_cword = 0;
+               sc->sc_type = 0;
+               return (0);
+       }
+
+       error = mpw_set_label(sc, &imr.imr_lshim);
+       if (error != 0)
+               return (error);
+
+       smpls = (struct sockaddr_mpls *)&req.dstaddr;
+       smpls->smpls_family = AF_MPLS;
+       smpls->smpls_label = imr.imr_rshim.shim_label;
+       req.addr = imr.imr_nexthop;
+
+       error = mpw_set_neighbor(sc, &req);
+       if (error != 0)
+               return (error);
+
+       sc->sc_cword = ISSET(imr.imr_flags, IMR_FLAG_CONTROLWORD);
+       sc->sc_type = imr.imr_type;
+
+       return (0);
+}
+
+static int
+mpw_get_config(struct mpw_softc *sc, const struct ifreq *ifr)
+{
+       struct ifmpwreq imr;
+
+       memset(&imr, 0, sizeof(imr));
+       imr.imr_flags = sc->sc_cword ? IMR_FLAG_CONTROLWORD : 0;
+       imr.imr_type = sc->sc_type;
+
+       imr.imr_lshim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
+       if (sc->sc_neighbor) {
+               imr.imr_rshim.shim_label =
+                   MPLS_SHIM2LABEL(sc->sc_neighbor->n_rshim.shim_label);
+               imr.imr_nexthop = sc->sc_neighbor->n_nexthop;
+       }
+
+       return (copyout(&imr, ifr->ifr_data, sizeof(imr)));
+}
+
 int
 mpw_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
        struct ifreq *ifr = (struct ifreq *) data;
        struct mpw_softc *sc = ifp->if_softc;
        struct shim_hdr shim;
-       struct sockaddr_in *sin;
-       struct sockaddr_in *sin_nexthop;
        int error = 0;
-       struct ifmpwreq imr;
 
        switch (cmd) {
        case SIOCSIFFLAGS:
@@ -194,99 +406,45 @@ mpw_ioctl(struct ifnet *ifp, u_long cmd,
        case SIOCGPWE3:
                ifr->ifr_pwe3 = IF_PWE3_ETHERNET;
                break;
+       case SIOCSPWE3CTRLWORD:
+               sc->sc_cword = ifr->ifr_pwe3 ? 1 : 0;
+               break;
+       case SIOCGPWE3CTRLWORD:
+               ifr->ifr_pwe3 = sc->sc_cword;
+               break;
+       case SIOCSPWE3FAT:
+               sc->sc_fword = ifr->ifr_pwe3 ? 1 : 0;
+               break;
+       case SIOCGPWE3FAT:
+               ifr->ifr_pwe3 = sc->sc_fword;
+               break;
+
+       case SIOCSPWE3NEIGHBOR:
+               error = mpw_set_neighbor(sc, (struct if_laddrreq *)data);
+               break;
+       case SIOCGPWE3NEIGHBOR:
+               error = mpw_get_neighbor(sc, (struct if_laddrreq *)data);
+               break;
+       case SIOCDPWE3NEIGHBOR:
+               error = mpw_del_neighbor(sc);
+               break;
 
        case SIOCGETLABEL:
-               shim.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
-               error = copyout(&shim, ifr->ifr_data, sizeof(shim));
+               error = mpw_get_label(sc, ifr);
                break;
        case SIOCSETLABEL:
-               if ((error = copyin(ifr->ifr_data, &shim, sizeof(shim))))
-                       break;
-               if (shim.shim_label > MPLS_LABEL_MAX ||
-                   shim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
-                       error = EINVAL;
+               error = copyin(ifr->ifr_data, &shim, sizeof(shim));
+               if (error != 0)
                        break;
-               }
-               shim.shim_label = MPLS_LABEL2SHIM(shim.shim_label);
-               rw_enter_write(&sc->sc_lock);
-               if (sc->sc_smpls.smpls_label != shim.shim_label) {
-                       error = mpw_set_label(sc, shim.shim_label,
-                           sc->sc_rdomain);
-               }
-               rw_exit_write(&sc->sc_lock);
+               error = mpw_set_label(sc, &shim);
                break;
 
        case SIOCSETMPWCFG:
-               error = suser(curproc);
-               if (error != 0)
-                       break;
-
-               error = copyin(ifr->ifr_data, &imr, sizeof(imr));
-               if (error != 0)
-                       break;
-
-               /* Teardown all configuration if got no nexthop */
-               sin = (struct sockaddr_in *) &imr.imr_nexthop;
-               if (sin->sin_addr.s_addr == 0) {
-                       if (rt_ifa_del(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
-                           smplstosa(&sc->sc_smpls), 0) == 0)
-                               sc->sc_smpls.smpls_label = 0;
-
-                       memset(&sc->sc_rshim, 0, sizeof(sc->sc_rshim));
-                       memset(&sc->sc_nexthop, 0, sizeof(sc->sc_nexthop));
-                       sc->sc_cword = 0;
-                       sc->sc_type = 0;
-                       break;
-               }
-
-               /* Validate input */
-               if (sin->sin_family != AF_INET ||
-                   imr.imr_lshim.shim_label > MPLS_LABEL_MAX ||
-                   imr.imr_lshim.shim_label <= MPLS_LABEL_RESERVED_MAX ||
-                   imr.imr_rshim.shim_label > MPLS_LABEL_MAX ||
-                   imr.imr_rshim.shim_label <= MPLS_LABEL_RESERVED_MAX) {
-                       error = EINVAL;
-                       break;
-               }
-
-               /* Setup labels and create inbound route */
-               imr.imr_lshim.shim_label =
-                   MPLS_LABEL2SHIM(imr.imr_lshim.shim_label);
-               imr.imr_rshim.shim_label =
-                   MPLS_LABEL2SHIM(imr.imr_rshim.shim_label);
-
-               rw_enter_write(&sc->sc_lock);
-               if (sc->sc_smpls.smpls_label != imr.imr_lshim.shim_label) {
-                       error = mpw_set_label(sc, imr.imr_lshim.shim_label,
-                           sc->sc_rdomain);
-               }
-               rw_exit_write(&sc->sc_lock);
-               if (error != 0)
-                       break;
-
-               /* Apply configuration */
-               sc->sc_cword = ISSET(imr.imr_flags, IMR_FLAG_CONTROLWORD);
-               sc->sc_type = imr.imr_type;
-               sc->sc_rshim.shim_label = imr.imr_rshim.shim_label;
-
-               memset(&sc->sc_nexthop, 0, sizeof(sc->sc_nexthop));
-               sin_nexthop = (struct sockaddr_in *) &sc->sc_nexthop;
-               sin_nexthop->sin_family = sin->sin_family;
-               sin_nexthop->sin_len = sizeof(struct sockaddr_in);
-               sin_nexthop->sin_addr.s_addr = sin->sin_addr.s_addr;
+               error = mpw_set_config(sc, ifr);
                break;
 
        case SIOCGETMPWCFG:
-               imr.imr_flags = sc->sc_cword ? IMR_FLAG_CONTROLWORD : 0;
-               imr.imr_type = sc->sc_type;
-               imr.imr_lshim.shim_label =
-                   MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label);
-               imr.imr_rshim.shim_label =
-                   MPLS_SHIM2LABEL(sc->sc_rshim.shim_label);
-               memcpy(&imr.imr_nexthop, &sc->sc_nexthop,
-                   sizeof(imr.imr_nexthop));
-
-               error = copyout(&imr, ifr->ifr_data, sizeof(imr));
+               error = mpw_get_config(sc, ifr);
                break;
 
        case SIOCSLIFPHYRTABLE:
@@ -297,18 +455,15 @@ mpw_ioctl(struct ifnet *ifp, u_long cmd,
                        error = EINVAL;
                        break;
                }
-               rw_enter_write(&sc->sc_lock);
                if (sc->sc_rdomain != ifr->ifr_rdomainid) {
-                       error = mpw_set_label(sc, sc->sc_smpls.smpls_label,
+                       error = mpw_set_route(sc, sc->sc_smpls.smpls_label,
                            ifr->ifr_rdomainid);
                }
-               rw_exit_write(&sc->sc_lock);
                break;
        case SIOCGLIFPHYRTABLE:
                ifr->ifr_rdomainid = sc->sc_rdomain;
                break;
 
-
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                break;
@@ -440,20 +595,22 @@ mpw_start(struct ifnet *ifp)
        struct ifnet *ifp0;
        struct mbuf *m, *m0;
        struct shim_hdr *shim;
+       struct mpw_neighbor *n;
        struct sockaddr_mpls smpls = {
                .smpls_len = sizeof(smpls),
                .smpls_family = AF_MPLS,
        };
        uint32_t bos;
 
+       n = sc->sc_neighbor;
        if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
-           sc->sc_rshim.shim_label == 0 ||
-           sc->sc_type == IMR_TYPE_NONE) {
+           sc->sc_type == IMR_TYPE_NONE ||
+           n == NULL) {
                IFQ_PURGE(&ifp->if_snd);
                return;
        }
 
-       rt = rtalloc(sstosa(&sc->sc_nexthop), RT_RESOLVE, sc->sc_rdomain);
+       rt = rtalloc(sstosa(&n->n_nexthop), RT_RESOLVE, sc->sc_rdomain);
        if (!rtisvalid(rt)) {
                IFQ_PURGE(&ifp->if_snd);
                goto rtfree;
@@ -515,7 +672,7 @@ mpw_start(struct ifnet *ifp)
 
                shim = mtod(m0, struct shim_hdr *);
                shim->shim_label = htonl(mpls_defttl) & MPLS_TTL_MASK;
-               shim->shim_label |= sc->sc_rshim.shim_label | bos;
+               shim->shim_label |= n->n_rshim.shim_label | bos;
 
                m0->m_pkthdr.ph_rtableid = ifp->if_rdomain;
 
Index: sbin/ifconfig/ifconfig.c
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.392
diff -u -p -r1.392 ifconfig.c
--- sbin/ifconfig/ifconfig.c    19 Feb 2019 08:12:30 -0000      1.392
+++ sbin/ifconfig/ifconfig.c    25 Feb 2019 09:58:15 -0000
@@ -242,7 +242,14 @@ void       clone_create(const char *, int);
 void   clone_destroy(const char *, int);
 void   unsetmediaopt(const char *, int);
 void   setmediainst(const char *, int);
-void   setmpelabel(const char *, int);
+void   setmplslabel(const char *, int);
+void   unsetmplslabel(const char *, int);
+void   setpwe3cw(const char *, int);
+void   unsetpwe3cw(const char *, int);
+void   setpwe3fat(const char *, int);
+void   unsetpwe3fat(const char *, int);
+void   setpwe3neighbor(const char *, const char *);
+void   unsetpwe3neighbor(const char *, int);
 void   process_mpw_commands(void);
 void   setmpwencap(const char *, int);
 void   setmpwlabel(const char *, const char *);
@@ -251,7 +258,7 @@ void        setmpwcontrolword(const char *, int
 void   setvlantag(const char *, int);
 void   setvlandev(const char *, int);
 void   unsetvlandev(const char *, int);
-void   mpe_status(void);
+void   mpls_status(void);
 void   mpw_status(void);
 void   setrdomain(const char *, int);
 void   unsetrdomain(const char *, int);
@@ -457,7 +464,14 @@ const struct       cmd {
        { "-staticarp", -IFF_STATICARP, 0,              setifflags },
        { "mpls",       IFXF_MPLS,      0,              setifxflags },
        { "-mpls",      -IFXF_MPLS,     0,              setifxflags },
-       { "mplslabel",  NEXTARG,        0,              setmpelabel },
+       { "mplslabel",  NEXTARG,        0,              setmplslabel },
+       { "-mplslabel", 0,              0,              unsetmplslabel },
+       { "pwecw",      0,              0,              setpwe3cw },
+       { "-pwecw",     0,              0,              unsetpwe3cw },
+       { "pwefat",     0,              0,              setpwe3fat },
+       { "-pwefat",    0,              0,              unsetpwe3fat },
+       { "pweneighbor", NEXTARG2,      0,              NULL, setpwe3neighbor },
+       { "-pweneighbor", 0,            0,              unsetpwe3neighbor },
        { "mpwlabel",   NEXTARG2,       0,              NULL, setmpwlabel },
        { "neighbor",   NEXTARG,        0,              setmpwneighbor },
        { "controlword", 1,             0,              setmpwcontrolword },
@@ -3293,7 +3307,7 @@ status(int link, struct sockaddr_dl *sdl
        pfsync_status();
        pppoe_status();
        sppp_status();
-       mpe_status();
+       mpls_status();
        mpw_status();
        pflow_status();
        umb_status();
@@ -3821,16 +3835,97 @@ delvnetflowid(const char *ignored, int a
 }
 
 void
-mpe_status(void)
+pwe3_neighbor(void)
+{
+       const char *prefix = "pwe3 remote label";
+       struct if_laddrreq req;
+       char hbuf[NI_MAXHOST];
+       struct sockaddr_mpls *smpls;
+       int error;
+
+       memset(&req, 0, sizeof(req));
+       if (strlcpy(req.iflr_name, name, sizeof(req.iflr_name)) >=
+           sizeof(req.iflr_name))
+               errx(1, "pwe3 neighbor: name is too long");
+
+       if (ioctl(s, SIOCGPWE3NEIGHBOR, &req) == -1) {
+               if (errno != EADDRNOTAVAIL)
+                       return;
+
+               printf(" %s (unset)", prefix);
+               return;
+       }
+
+       if (req.dstaddr.ss_family != AF_MPLS) {
+               warnc(EPFNOSUPPORT, "pwe3 neighbor");
+               return;
+       }
+       smpls = (struct sockaddr_mpls *)&req.dstaddr;
+
+       error = getnameinfo((struct sockaddr *)&req.addr, sizeof(req.addr),
+           hbuf, sizeof(hbuf), NULL, 0, NI_NUMERICHOST);
+       if (error != 0) {
+               warnx("%s: %s", prefix, gai_strerror(error));
+               return;
+       }
+
+       printf(" %s %u on %s", prefix, smpls->smpls_label, hbuf);
+}
+
+void
+pwe3_cword(void)
+{
+       struct ifreq req;
+
+       memset(&req, 0, sizeof(req));
+       if (strlcpy(req.ifr_name, name, sizeof(req.ifr_name)) >=
+           sizeof(req.ifr_name))
+               errx(1, "pwe3 control word: name is too long");
+
+       if (ioctl(s, SIOCGPWE3CTRLWORD, &req) == -1) {
+               return;
+       }
+
+       printf(" %s", req.ifr_pwe3 ? "cw" : "nocw");
+}
+
+void
+pwe3_fword(void)
+{
+       struct ifreq req;
+
+       memset(&req, 0, sizeof(req));
+       if (strlcpy(req.ifr_name, name, sizeof(req.ifr_name)) >=
+           sizeof(req.ifr_name))
+               errx(1, "pwe3 control word: name is too long");
+
+       if (ioctl(s, SIOCGPWE3FAT, &req) == -1)
+               return;
+
+       printf(" %s", req.ifr_pwe3 ? "fat" : "nofat");
+}
+
+void
+mpls_status(void)
 {
        struct shim_hdr shim;
 
        bzero(&shim, sizeof(shim));
        ifr.ifr_data = (caddr_t)&shim;
 
-       if (ioctl(s, SIOCGETLABEL , (caddr_t)&ifr) == -1)
-               return;
-       printf("\tmpls label: %d\n", shim.shim_label);
+       if (ioctl(s, SIOCGETLABEL, (caddr_t)&ifr) == -1) {
+               if (errno != EADDRNOTAVAIL)
+                       return;
+
+               printf("\tmpls: label (unset)");
+       } else
+               printf("\tmpls: label %u", shim.shim_label);
+
+       pwe3_neighbor();
+       pwe3_cword();
+       pwe3_fword();
+
+       printf("\n");
 }
 
 void
@@ -3885,7 +3980,7 @@ mpw_status(void)
 
 /* ARGSUSED */
 void
-setmpelabel(const char *val, int d)
+setmplslabel(const char *val, int d)
 {
        struct shim_hdr  shim;
        const char      *estr;
@@ -3898,6 +3993,114 @@ setmpelabel(const char *val, int d)
                errx(1, "mpls label %s is %s", val, estr);
        if (ioctl(s, SIOCSETLABEL, (caddr_t)&ifr) == -1)
                warn("SIOCSETLABEL");
+}
+
+void
+unsetmplslabel(const char *val, int d)
+{
+       struct ifreq req;
+
+       memset(&req, 0, sizeof(req));
+       if (strlcpy(req.ifr_name, name, sizeof(req.ifr_name)) >=
+           sizeof(req.ifr_name))
+               errx(1, "interface name is too long");
+
+       if (ioctl(s, SIOCDELLABEL, (caddr_t)&ifr) == -1)
+               warn("-mplslabel");
+}
+
+void
+setpwe3(unsigned long cmd, const char *cmdname, int value)
+{
+       struct ifreq req;
+
+       memset(&req, 0, sizeof(req));
+       if (strlcpy(req.ifr_name, name, sizeof(req.ifr_name)) >=
+           sizeof(req.ifr_name))
+               errx(1, "interface name is too long");
+
+       req.ifr_pwe3 = value;
+
+       if (ioctl(s, cmd, &req) == -1)
+               warn("%s", cmdname);
+}
+
+void
+setpwe3cw(const char *val, int d)
+{
+       setpwe3(SIOCSPWE3CTRLWORD, "pwecw", 1);
+}
+
+void
+unsetpwe3cw(const char *val, int d)
+{
+       setpwe3(SIOCSPWE3CTRLWORD, "-pwecw", 0);
+}
+
+void
+setpwe3fat(const char *val, int d)
+{
+       setpwe3(SIOCSPWE3FAT, "pwefat", 1);
+}
+
+void
+unsetpwe3fat(const char *val, int d)
+{
+       setpwe3(SIOCSPWE3FAT, "-pwefat", 0);
+}
+
+void
+setpwe3neighbor(const char *label, const char *neighbor)
+{
+       struct if_laddrreq req;
+       struct addrinfo hints, *res;
+       struct sockaddr_mpls *smpls = (struct sockaddr_mpls *)&req.dstaddr;;
+       const char *errstr;
+       int error;
+
+       memset(&req, 0, sizeof(req));
+       if (strlcpy(req.iflr_name, name, sizeof(req.iflr_name)) >=
+           sizeof(req.iflr_name))
+               errx(1, "interface name is too long");
+
+       memset(&hints, 0, sizeof(hints));
+       hints.ai_family = AF_UNSPEC;
+       hints.ai_socktype = SOCK_DGRAM;
+       error = getaddrinfo(neighbor, NULL, &hints, &res);
+       if (error != 0)
+               errx(1, "pweneighbor %s: %s", neighbor, gai_strerror(error));
+
+       smpls->smpls_len = sizeof(*smpls);
+       smpls->smpls_family = AF_MPLS;
+       smpls->smpls_label = strtonum(label,
+           (MPLS_LABEL_RESERVED_MAX + 1), MPLS_LABEL_MAX, &errstr);
+       if (errstr != NULL)
+               errx(1, "pweneighbor: invalid label: %s", errstr);
+
+
+       if (res->ai_addrlen > sizeof(req.addr))
+               errx(1, "pweneighbors: unexpected socklen");
+
+       memcpy(&req.addr, res->ai_addr, res->ai_addrlen);
+
+       freeaddrinfo(res);
+
+       if (ioctl(s, SIOCSPWE3NEIGHBOR, &req) == -1)
+               warn("pweneighbor");
+}
+
+void
+unsetpwe3neighbor(const char *val, int d)
+{
+       struct ifreq req;
+
+       memset(&req, 0, sizeof(req));
+       if (strlcpy(req.ifr_name, name, sizeof(req.ifr_name)) >=
+           sizeof(req.ifr_name))
+               errx(1, "interface name is too long");
+
+       if (ioctl(s, SIOCDPWE3NEIGHBOR, &req) == -1)
+               warn("-pweneighbor");
 }
 
 void

Reply via email to