On Wed, Jan 30, 2019 at 11:54:45AM -0200, Martin Pieuchot wrote:
> On 30/01/19(Wed) 11:48, David Gwynne wrote:
> > mpls uses AF_MPLS routes with RTF_LOCAL set on them to know which tags
> > are used as input for the mpe and mpw interfaces. setting this up
> > currently goes through rt_ifa_add, but that has a couple of features
> > that are undesirable for mpls.
> > 
> > Firstly, rt_ifa_add unconditionally sets RTF_MPATH on the routes it
> > adds, which means multiple mpe and mpw interfaces can "own" the same
> > input tag. mpe tries to work around this by maintaining a global list of
> > mpe interfaces, and iterates over them when a new label is added. That's
> > ok (sort of) for mpe, but it doesnt take the tags used by mpw into
> > account.
> 
> I don't understand how this work, where are these 'tag' and 'label'
> stored?  What do you mean with "'own' the same input tag"?

I'm using tag and label interchangably here, but it refers to the number
that appears in the MPLS shim header on the wire. Each label is supposed
to represent a single "forwarding equivalence class", ie, each MPLS label
is only supposed to do one thing. When we're talkign about mpe and mpw
input, we're talking about configuring each interface with a local
MPLS label. Packets sent to the label on the local machine should
end up as packets coming in on an mpe or mpw interface.

Right now mpw and mpe use rt_ifa_add() to claim ownership of a local
tag. Once something is using a tag, nothing else should be able to add
an entry to the mpls rtable with that same tag. Because rt_ifa_add()
adds RTF_MPATH unconditionally, it is possible to add two things with
the same label to the mpls rtable.

> What is the current 'gateway' value of MPLS RTF_LOCAL routes? 

It's the struct sockaddr_dl *if_sadl in struct ifnet.

> > Secondly, I'd like to start pulling apart the restriction on the use of
> > mpls only in rdomain 1. rt_ifa_add doesn't help this situation because
> > it assumes that we're adding a route inside the rdomain the interface is
> > in, rather than the one it tunnels in. Changing this assumption means
> > forking rt_ifa_add, and oh look, that's what I've started here.
> 
> I'm afraid of adding new rtrequest(9) calls, especially with new error
> conditions.  Why can't you add the rdomain of the tunnel?

ifp->if_rdomain refers to the rdomain of the traffic going over an
interface. For tunnels it is the rdomain of packets inside the
tunnel. Another way of saying it is that a tunnel provides an overlay
on top of an underlay network, and ifp->if_rdomain is the rdomain
of the overlay traffic.

When we're adding the mpls rtable entries we're specifying connectivity
outside the tunnel, aka routes in the network underlay. Currently
rt_ifa_{add,del} force anything with RTF_MPLS into rdomain 0 despite
what the overlay is, and despite where you might want to run the
underlay. One of the configurations claudio@ and I talked about for
the firewalls at work was running MPLS in rdomain 1 and leaving our
current config running in rdomain 0. This model is not currently
possible with RTF_MPLS routes forced to the mpls rtable in rdomain 0.

> If all you need is remove RTF_MPATH, then do so :)
> 
> We can add it to all rt_ifa_add(9) calls to be explicit!

That won't solve the rdomain thing though.

How about making rt_ifa_{add,del} as wrappers around the thing that
let's you not specify RTF_MPATH, and explicitly configure the rdomain?
This effectively replaces rt_ifa_{add,del} with rt_ifa_{add,del}_rdomain
respectively, but provides the old names as wrappers on the new one.

At least it keeps everything in one place...

I'm not wedded to the names, it was just the least worst I came up with
inbetween meetings.

Index: route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.379
diff -u -p -r1.379 route.c
--- route.c     23 Nov 2018 16:24:11 -0000      1.379
+++ route.c     31 Jan 2019 03:00:37 -0000
@@ -1032,17 +1032,24 @@ rt_maskedcopy(struct sockaddr *src, stru
 int
 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst)
 {
+       return (rt_ifa_add_rdomain(ifa, flags | RTF_MPATH, dst,
+            ifa->ifa_ifp->if_rdomain));
+}
+
+int
+rt_ifa_add_rdomain(struct ifaddr *ifa, int flags, struct sockaddr *dst,
+    unsigned int rtableid)
+{
        struct ifnet            *ifp = ifa->ifa_ifp;
        struct rtentry          *rt;
        struct sockaddr_rtlabel  sa_rl;
        struct rt_addrinfo       info;
-       unsigned int             rtableid = ifp->if_rdomain;
        uint8_t                  prio = ifp->if_priority + RTP_STATIC;
        int                      error;
 
        memset(&info, 0, sizeof(info));
        info.rti_ifa = ifa;
-       info.rti_flags = flags | RTF_MPATH;
+       info.rti_flags = flags;
        info.rti_info[RTAX_DST] = dst;
        if (flags & RTF_LLINFO)
                info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
@@ -1051,11 +1058,8 @@ rt_ifa_add(struct ifaddr *ifa, int flags
        info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
 
 #ifdef MPLS
-       if ((flags & RTF_MPLS) == RTF_MPLS) {
+       if ((flags & RTF_MPLS) == RTF_MPLS)
                info.rti_mpls = MPLS_OP_POP;
-               /* MPLS routes only exist in rdomain 0 */
-               rtableid = 0;
-       }
 #endif /* MPLS */
 
        if ((flags & RTF_HOST) == 0)
@@ -1085,21 +1089,22 @@ rt_ifa_add(struct ifaddr *ifa, int flags
 int
 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst)
 {
+       return (rt_ifa_del_rdomain(ifa, flags, dst,
+           ifa->ifa_ifp->if_rdomain));
+}
+
+int
+rt_ifa_del_rdomain(struct ifaddr *ifa, int flags, struct sockaddr *dst,
+    unsigned int rtableid)
+{
        struct ifnet            *ifp = ifa->ifa_ifp;
        struct rtentry          *rt;
        struct mbuf             *m = NULL;
        struct sockaddr         *deldst;
        struct rt_addrinfo       info;
        struct sockaddr_rtlabel  sa_rl;
-       unsigned int             rtableid = ifp->if_rdomain;
        uint8_t                  prio = ifp->if_priority + RTP_STATIC;
        int                      error;
-
-#ifdef MPLS
-       if ((flags & RTF_MPLS) == RTF_MPLS)
-               /* MPLS routes only exist in rdomain 0 */
-               rtableid = 0;
-#endif /* MPLS */
 
        if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
                m = m_get(M_DONTWAIT, MT_SONAME);
Index: route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.173
diff -u -p -r1.173 route.h
--- route.h     12 Nov 2018 16:36:54 -0000      1.173
+++ route.h     31 Jan 2019 03:00:37 -0000
@@ -456,7 +456,11 @@ void        rtref(struct rtentry *);
 void    rtfree(struct rtentry *);
 
 int     rt_ifa_add(struct ifaddr *, int, struct sockaddr *);
+int     rt_ifa_add_rdomain(struct ifaddr *, int, struct sockaddr *,
+            unsigned int);
 int     rt_ifa_del(struct ifaddr *, int, struct sockaddr *);
+int     rt_ifa_del_rdomain(struct ifaddr *, int, struct sockaddr *,
+            unsigned int);
 void    rt_ifa_purge(struct ifaddr *);
 int     rt_ifa_addlocal(struct ifaddr *);
 int     rt_ifa_dellocal(struct ifaddr *);
Index: if_mpe.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpe.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_mpe.c
--- if_mpe.c    31 Jan 2019 02:00:27 -0000      1.77
+++ if_mpe.c    31 Jan 2019 03:00:37 -0000
@@ -133,8 +133,8 @@ mpe_clone_destroy(struct ifnet *ifp)
        LIST_REMOVE(sc, sc_list);
 
        if (sc->sc_smpls.smpls_label) {
-               rt_ifa_del(&sc->sc_ifa, RTF_MPLS,
-                   smplstosa(&sc->sc_smpls));
+               rt_ifa_del_rdomain(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
+                   smplstosa(&sc->sc_smpls), 0);
        }
 
        if_detach(ifp);
@@ -329,13 +329,13 @@ mpe_ioctl(struct ifnet *ifp, u_long cmd,
                        break;
                if (sc->sc_smpls.smpls_label) {
                        /* remove old MPLS route */
-                       rt_ifa_del(&sc->sc_ifa, RTF_MPLS,
-                           smplstosa(&sc->sc_smpls));
+                       rt_ifa_del_rdomain(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
+                           smplstosa(&sc->sc_smpls), 0);
                }
                /* add new MPLS route */
                sc->sc_smpls.smpls_label = shim.shim_label;
-               error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
-                   smplstosa(&sc->sc_smpls));
+               error = rt_ifa_add_rdomain(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
+                   smplstosa(&sc->sc_smpls), 0);
                if (error) {
                        sc->sc_smpls.smpls_label = 0;
                        break;
@@ -346,8 +346,9 @@ mpe_ioctl(struct ifnet *ifp, u_long cmd,
                /* XXX does not make sense, the MPLS route is on rtable 0 */
                if (ifr->ifr_rdomainid != ifp->if_rdomain) {
                        if (sc->sc_smpls.smpls_label) {
-                               rt_ifa_add(&sc->sc_ifa, RTF_MPLS,
-                                   smplstosa(&sc->sc_smpls));
+                               rt_ifa_add_rdomain(&sc->sc_ifa,
+                                   RTF_MPLS | RTF_LOCAL,
+                                   smplstosa(&sc->sc_smpls), 0);
                        }
                }
                /* return with ENOTTY so that the parent handler finishes */
Index: if_mpw.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpw.c,v
retrieving revision 1.31
diff -u -p -r1.31 if_mpw.c
--- if_mpw.c    30 Jan 2019 01:09:36 -0000      1.31
+++ if_mpw.c    31 Jan 2019 03:00:37 -0000
@@ -116,8 +116,8 @@ mpw_clone_destroy(struct ifnet *ifp)
        ifp->if_flags &= ~IFF_RUNNING;
 
        if (sc->sc_smpls.smpls_label) {
-               rt_ifa_del(&sc->sc_ifa, RTF_MPLS,
-                   smplstosa(&sc->sc_smpls));
+               rt_ifa_del_rdomain(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
+                   smplstosa(&sc->sc_smpls), 0);
        }
 
        ether_ifdetach(ifp);
@@ -162,9 +162,9 @@ mpw_ioctl(struct ifnet *ifp, u_long cmd,
                /* Teardown all configuration if got no nexthop */
                sin = (struct sockaddr_in *) &imr.imr_nexthop;
                if (sin->sin_addr.s_addr == 0) {
-                       if (rt_ifa_del(&sc->sc_ifa, RTF_MPLS,
-                           smplstosa(&sc->sc_smpls)) == 0)
-                               sc->sc_smpls.smpls_label = 0;
+                       rt_ifa_del_rdomain(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL,
+                           smplstosa(&sc->sc_smpls), 0);
+                       sc->sc_smpls.smpls_label = 0;
 
                        memset(&sc->sc_rshim, 0, sizeof(sc->sc_rshim));
                        memset(&sc->sc_nexthop, 0, sizeof(sc->sc_nexthop));
@@ -190,13 +190,16 @@ mpw_ioctl(struct ifnet *ifp, u_long cmd,
                    MPLS_LABEL2SHIM(imr.imr_rshim.shim_label);
 
                if (sc->sc_smpls.smpls_label != imr.imr_lshim.shim_label) {
-                       if (sc->sc_smpls.smpls_label)
-                               rt_ifa_del(&sc->sc_ifa, RTF_MPLS,
-                                   smplstosa(&sc->sc_smpls));
+                       if (sc->sc_smpls.smpls_label) {
+                               rt_ifa_del_rdomain(&sc->sc_ifa,
+                                   RTF_MPLS | RTF_LOCAL,
+                                   smplstosa(&sc->sc_smpls), 0);
+                       }
 
                        sc->sc_smpls.smpls_label = imr.imr_lshim.shim_label;
-                       error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS|RTF_LOCAL,
-                           smplstosa(&sc->sc_smpls));
+                       error = rt_ifa_add_rdomain(&sc->sc_ifa,
+                           RTF_MPLS | RTF_LOCAL,
+                           smplstosa(&sc->sc_smpls), 0);
                        if (error != 0) {
                                sc->sc_smpls.smpls_label = 0;
                                break;

Reply via email to