On Mon, May 24, 2010 at 05:23:00PM +0700, Insan Praja SW wrote:
> Hi Misc@,
> Before I begin to test OpenBGPD mpls VPN support on current, is
> there any hints on route-leaking, and an example/hints to make a
> complete setup MPLS cloud and MPLS/VPN on a network.
> 
> In my later experiences using OpenBSD, I use pf with rtable to make
> a VPN-like network without isolation on the network. Now I need to
> know if there are ways to have a semi-isolated network when using
> rdomain or anything like it.

Passing traffic between VPNs is either done in pf(4) by setting the rtable
on a rule or by importing routes in BGP (import/export-target).
The first method is much more flexible but more static.

First of all you need the attached diff to play with the kernel MPLS part.
With that in you can start playing with the various parts.
1. You need to MPLS enable the interfaces that do MPLS
   In my test I use a vlan for this:
        # more /etc/hostname.vlan2003 
        vlan 2003 vlandev sis0
        inet 10.83.128.26 255.255.255.248 NONE
        mpls

2. Then it is best to have a loopback interface:
        # more /etc/hostname.lo1
        inet 10.83.66.23 255.255.255.255 NONE

3. LDP config:
        router-id 10.83.66.23
        distribution independent
        retention liberal
        advertisement unsolicited
        interface lo1 {
        }
        interface vlan2003 {
        }

4. I use ospfd as IGP, there is nothing special needed here.

5. create a rdomain 1:
        # more /etc/hostname.vlan2017
        rdomain 1
        vlan 2017 vlandev sis0
        inet 192.168.220.1 255.255.255.0

6. create a mpe(4) in rdomain 1:
        # more /etc/hostname.mpe0
        rdomain 1 mplslabel 543
        inet 10.83.66.129 255.255.255.255

Note: it is necessary to have an IP on mpe(4) but it does not matter which
one you pick. I normaly use the loopback IP but maybe using the vlan2017
IP would be smarter.

7. BGP config:
        AS 65003
        router-id 10.83.66.23
        listen on 10.83.66.23
        rdomain 1 {
                descr "CUSTOMER1"
                rd 65003:1
                import-target rt 65003:1
                export-target rt 65003:1
                depend on mpe0
                network 192.168.220/24
        }
        group ibgp {
                announce IPv4 unicast
                announce IPv4 vpn
                remote-as 65003
                local-address 10.83.66.23
                neighbor 10.83.66.2 {
                        descr c2
                }
        }

Start ospfd, bgpd, and ldpd and hope for the best (check that all sessions
come up). Setup something similar on a second system.
Use e.g. ping -V1 -I 192.168.220.1 192.168.221.1 to test the VPN.

It is possible to use gif/gre instead of LDP -- just use a gre interface
in point 1 and skip everyting that needs LDP.

-- 
:wq Claudio

Index: sbin/ifconfig/ifconfig.8
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.200
diff -u -p -r1.200 ifconfig.8
--- sbin/ifconfig/ifconfig.8    7 May 2010 06:17:34 -0000       1.200
+++ sbin/ifconfig/ifconfig.8    24 May 2010 12:48:34 -0000
@@ -347,6 +347,11 @@ this directive is used to select between
 and 802.11g
 .Pq Dq 11g
 operating modes.
+.It Cm mpls
+Enable Multiprotocol Label Switching (MPLS) on the interface. It will be
+able to send and receive MPLS traffic.
+.It Fl mpls
+Disable MPLS on the interface.
 .It Cm mtu Ar value
 Set the MTU for this device to the given
 .Ar value .
Index: sbin/ifconfig/ifconfig.c
===================================================================
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.232
diff -u -p -r1.232 ifconfig.c
--- sbin/ifconfig/ifconfig.c    6 May 2010 12:58:40 -0000       1.232
+++ sbin/ifconfig/ifconfig.c    6 May 2010 20:34:51 -0000
@@ -191,6 +191,7 @@ void        unsetmediaopt(const char *, int);
 void   setmediainst(const char *, int);
 void   settimeslot(const char *, int);
 void   timeslot_status(void);
+void   setifmpls(const char *, int);
 void   setmpelabel(const char *, int);
 void   setvlantag(const char *, int);
 void   setvlanprio(const char *, int);
@@ -346,6 +347,8 @@ const struct        cmd {
        { "-rtlabel",   -1,             0,              setifrtlabel },
        { "range",      NEXTARG,        0,              setatrange },
        { "phase",      NEXTARG,        0,              setatphase },
+       { "mpls",       IFXF_MPLS,      0,              setifxflags },
+       { "-mpls",      -IFXF_MPLS,     0,              setifxflags },
        { "mplslabel",  NEXTARG,        0,              setmpelabel },
        { "advbase",    NEXTARG,        0,              setcarp_advbase },
        { "advskew",    NEXTARG,        0,              setcarp_advskew },
@@ -3252,6 +3255,7 @@ mpe_status(void)
        printf("\tmpls label: %d\n", shim.shim_label);
 }
 
+/* ARGSUSED */
 void
 setmpelabel(const char *val, int d)
 {
Index: sys/conf/GENERIC
===================================================================
RCS file: /cvs/src/sys/conf/GENERIC,v
retrieving revision 1.156
diff -u -p -r1.156 GENERIC
--- sys/conf/GENERIC    7 May 2010 13:16:18 -0000       1.156
+++ sys/conf/GENERIC    7 May 2010 13:38:24 -0000
@@ -68,7 +68,7 @@ option                PPP_DEFLATE
 #option                PIPEX           # Pppac IP EXtension, for npppd
 option         MROUTING        # Multicast router
 #option                PIM             # Protocol Independent Multicast
-#option                MPLS            # Multi-Protocol Label Switching
+option         MPLS            # Multi-Protocol Label Switching
 
 #mpath0                at root         # SCSI Multipathing
 #scsibus*      at mpath?
@@ -101,7 +101,7 @@ pseudo-device       carp            # CARP protocol supp
 pseudo-device  gif             # IPv[46] over IPv[46] tunnel (RFC1933)
 pseudo-device  gre             # GRE encapsulation interface
 pseudo-device  loop            # network loopback
-#pseudo-device mpe             # MPLS PE interface
+pseudo-device  mpe             # MPLS PE interface
 pseudo-device  ppp             # PPP
 pseudo-device  pppoe           # PPP over Ethernet (RFC 2516)
 pseudo-device  sl              # CSLIP
Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.215
diff -u -p -r1.215 if.c
--- sys/net/if.c        8 May 2010 11:07:20 -0000       1.215
+++ sys/net/if.c        12 May 2010 08:42:56 -0000
@@ -107,6 +107,10 @@
 #include <netinet6/ip6_var.h>
 #endif
 
+#ifdef MPLS
+#include <netmpls/mpls.h>
+#endif
+
 #if NBPFILTER > 0
 #include <net/bpf.h>
 #endif
@@ -1345,6 +1349,26 @@ ifioctl(struct socket *so, u_long cmd, c
                        }
                }
 #endif
+
+#ifdef MPLS
+               if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
+                   !ISSET(ifp->if_xflags, IFXF_MPLS)) {
+                       int s = splnet();
+                       ifp->if_xflags |= IFXF_MPLS;
+                       ifp->if_ll_output = ifp->if_output; 
+                       ifp->if_output = mpls_output;
+                       splx(s);
+               }
+               if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
+                   !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
+                       int s = splnet();
+                       ifp->if_xflags &= ~IFXF_MPLS;
+                       ifp->if_output = ifp->if_ll_output; 
+                       ifp->if_ll_output = NULL;
+                       splx(s);
+               }
+#endif
+
 
                ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
                        (ifr->ifr_flags &~ IFXF_CANTCHANGE);
Index: sys/net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.115
diff -u -p -r1.115 if.h
--- sys/net/if.h        17 Apr 2010 17:46:32 -0000      1.115
+++ sys/net/if.h        11 May 2010 11:49:48 -0000
@@ -266,6 +266,10 @@ struct ifnet {                             /* and the 
entries */
                                        /* output routine (enqueue) */
        int     (*if_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
                     struct rtentry *);
+
+                                       /* link level output function */
+       int     (*if_ll_output)(struct ifnet *, struct mbuf *,
+                   struct sockaddr *, struct rtentry *);
                                        /* initiate output routine */
        void    (*if_start)(struct ifnet *);
                                        /* ioctl routine */
@@ -326,6 +330,7 @@ struct ifnet {                              /* and the 
entries */
 #define IFXF_TXREADY           0x1             /* interface is ready to tx */
 #define        IFXF_NOINET6            0x2             /* don't do inet6 */
 #define        IFXF_INET6_PRIVACY      0x4             /* autoconf privacy 
extension */
+#define        IFXF_MPLS               0x8             /* supports MPLS */
 
 #define        IFXF_CANTCHANGE \
        (IFXF_TXREADY)
Index: sys/net/if_ethersubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.142
diff -u -p -r1.142 if_ethersubr.c
--- sys/net/if_ethersubr.c      7 May 2010 13:33:16 -0000       1.142
+++ sys/net/if_ethersubr.c      7 May 2010 13:34:21 -0000
@@ -277,12 +277,7 @@ ether_output(ifp0, m0, dst, rt0)
                        else
                                senderr(EHOSTUNREACH);
                }
-#ifdef MPLS
-               if (rt->rt_flags & RTF_MPLS) {
-                       if ((m = mpls_output(m, rt)) == NULL)
-                               senderr(EHOSTUNREACH);
-               }
-#endif
+
                if (rt->rt_flags & RTF_GATEWAY) {
                        if (rt->rt_gwroute == 0)
                                goto lookup;
@@ -299,7 +294,6 @@ ether_output(ifp0, m0, dst, rt0)
                            time_second < rt->rt_rmx.rmx_expire)
                                senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
        }
-
        switch (dst->sa_family) {
 
 #ifdef INET
@@ -310,12 +304,7 @@ ether_output(ifp0, m0, dst, rt0)
                if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
                    !m->m_pkthdr.pf.routed)
                        mcopy = m_copy(m, 0, (int)M_COPYALL);
-#ifdef MPLS
-               if (rt0 != NULL && rt0->rt_flags & RTF_MPLS)
-                       etype = htons(ETHERTYPE_MPLS);
-               else
-#endif
-                       etype = htons(ETHERTYPE_IP);
+               etype = htons(ETHERTYPE_IP);
                break;
 #endif
 #ifdef INET6
@@ -382,6 +371,9 @@ ether_output(ifp0, m0, dst, rt0)
                else
                        senderr(EHOSTUNREACH);
 
+               if (!ISSET(ifp->if_xflags, IFXF_MPLS))
+                       senderr(ENETUNREACH);
+
                switch (dst->sa_family) {
                        case AF_LINK:
                                if (((struct sockaddr_dl *)dst)->sdl_alen <
@@ -490,7 +482,6 @@ ether_output(ifp0, m0, dst, rt0)
                }
        }
 #endif
-
        mflags = m->m_flags;
        len = m->m_pkthdr.len;
        s = splnet();
Index: sys/net/if_mpe.c
===================================================================
RCS file: /cvs/src/sys/net/if_mpe.c,v
retrieving revision 1.18
diff -u -p -r1.18 if_mpe.c
--- sys/net/if_mpe.c    9 Jan 2010 20:29:42 -0000       1.18
+++ sys/net/if_mpe.c    19 May 2010 07:46:51 -0000
@@ -64,6 +64,7 @@ int   mpeioctl(struct ifnet *, u_long, cad
 void   mpestart(struct ifnet *);
 int    mpe_clone_create(struct if_clone *, int);
 int    mpe_clone_destroy(struct ifnet *);
+int    mpe_newlabel(struct ifnet *, int, struct shim_hdr *);
 
 LIST_HEAD(, mpe_softc) mpeif_list;
 struct if_clone        mpe_cloner =
@@ -90,7 +91,7 @@ mpe_clone_create(struct if_clone *ifc, i
            M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
                return (ENOMEM);
 
-       mpeif->sc_shim.shim_label = MPLS_BOS_MASK | htonl(mpls_defttl);
+       mpeif->sc_shim.shim_label = 0;
        mpeif->sc_unit = unit;
        ifp = &mpeif->sc_if;
        snprintf(ifp->if_xname, sizeof ifp->if_xname, "mpe%d", unit);
@@ -107,7 +108,7 @@ mpe_clone_create(struct if_clone *ifc, i
        if_attach(ifp);
        if_alloc_sadl(ifp);
 #if NBPFILTER > 0
-       bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, MPE_HDRLEN);
+       bpfattach(&ifp->if_bpf, ifp, DLT_NULL, sizeof(u_int32_t));
 #endif
 
        s = splnet();
@@ -132,6 +133,7 @@ mpe_clone_destroy(struct ifnet *ifp)
        return (0);
 }
 
+struct sockaddr_storage         mpedst;
 /*
  * Start output on the mpe interface.
  */
@@ -139,9 +141,10 @@ void
 mpestart(struct ifnet *ifp)
 {
        struct mbuf             *m;
-       struct mpe_softc        *ifm;
-       struct shim_hdr          shim;
+       struct sockaddr         *sa = (struct sockaddr *)&mpedst;
        int                      s;
+       sa_family_t              af;
+       struct rtentry          *rt;
 
        for (;;) {
                s = splnet();
@@ -151,30 +154,46 @@ mpestart(struct ifnet *ifp)
                if (m == NULL)
                        return;
 
-#ifdef DIAGNOSTIC
-               if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.rdomain)) {
-                       printf("%s: trying to send packet on wrong domain. "
-                           "if %d vs. mbuf %d\n", ifp->if_xname,
-                           ifp->if_rdomain, rtable_l2(m->m_pkthdr.rdomain));
+               af = *mtod(m, sa_family_t *);
+               m_adj(m, sizeof(af));
+               switch (af) {
+               case AF_INET:
+                       bzero(sa, sizeof(struct sockaddr_in));
+                       satosin(sa)->sin_family = af;
+                       satosin(sa)->sin_len = sizeof(struct sockaddr_in);
+                       bcopy(mtod(m, caddr_t), &satosin(sa)->sin_addr,
+                           sizeof(in_addr_t));
+                       m_adj(m, sizeof(in_addr_t));
+                       break;
+               default:
+                       m_freem(m);
+                       continue;
                }
-#endif
 
-#if NBPFILTER > 0
-               if (ifp->if_bpf)
-                       bpf_mtap_af(ifp->if_bpf, AF_INET, m, BPF_DIRECTION_OUT);
-#endif
-               ifm = ifp->if_softc;
-               shim.shim_label = ifm->sc_shim.shim_label;
-               M_PREPEND(m, sizeof(shim), M_DONTWAIT);
-               m_copyback(m, 0, sizeof(shim), (caddr_t)&shim);
-               if (m == NULL) {
-                       ifp->if_ierrors++;
+               rt = rtalloc1(sa, RT_REPORT, 0);
+               if (rt == NULL) {
+                       /* no route give up */
+                       m_freem(m);
                        continue;
                }
-               m->m_pkthdr.rcvif = ifp;
-               /* XXX assumes MPLS is always in rdomain 0 */
-               m->m_pkthdr.rdomain = 0;
-               mpls_output(m, NULL);
+
+#if NBPFILTER > 0
+               if (ifp->if_bpf) {
+                       /* remove MPLS label before passing packet to bpf */
+                       m->m_data += sizeof(struct shim_hdr);
+                       m->m_len -= sizeof(struct shim_hdr);
+                       m->m_pkthdr.len -= sizeof(struct shim_hdr);
+                       bpf_mtap_af(ifp->if_bpf, af, m, BPF_DIRECTION_OUT);
+                       m->m_data -= sizeof(struct shim_hdr);
+                       m->m_len += sizeof(struct shim_hdr);
+                       m->m_pkthdr.len += sizeof(struct shim_hdr);
+               }
+#endif
+               /* XXX lie, but mpls_output will only look at sa_family */
+               sa->sa_family = AF_MPLS;
+
+               mpls_output(rt->rt_ifp, m, sa, rt);
+               RTFREE(rt);
        }
 }
 
@@ -182,25 +201,64 @@ int
 mpeoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
        struct rtentry *rt)
 {
-       int     s;
-       int     error;
+       struct shim_hdr shim;
+       int             s;
+       int             error;
+       int             off;
+       u_int8_t        op = 0;
+
+#ifdef DIAGNOSTIC
+       if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.rdomain)) {
+               printf("%s: trying to send packet on wrong domain. "
+                   "if %d vs. mbuf %d\n", ifp->if_xname,
+                   ifp->if_rdomain, rtable_l2(m->m_pkthdr.rdomain));
+       }
+#endif
+       m->m_pkthdr.rcvif = ifp;
+       /* XXX assumes MPLS is always in rdomain 0 */
+       m->m_pkthdr.rdomain = 0;
 
        error = 0;
        switch (dst->sa_family) {
+#ifdef INET
        case AF_INET:
+               if (rt && rt->rt_flags & RTF_MPLS) {
+                       shim.shim_label =
+                           ((struct rt_mpls *)rt->rt_llinfo)->mpls_label;
+                       shim.shim_label |= MPLS_BOS_MASK;
+                       op =  ((struct rt_mpls *)rt->rt_llinfo)->mpls_operation;
+               }
+               if (op != MPLS_OP_PUSH) {
+                       m_freem(m);
+                       error = ENETUNREACH;
+                       goto out;
+               }
+               if (mpls_mapttl_ip) {
+                       struct ip       *ip;
+                       ip = mtod(m, struct ip *);
+                       shim.shim_label |= htonl(ip->ip_ttl) & MPLS_TTL_MASK;
+               } else
+                       shim.shim_label |= htonl(mpls_defttl) & MPLS_TTL_MASK;
+               off = sizeof(sa_family_t) + sizeof(in_addr_t);
+               M_PREPEND(m, sizeof(shim) + off, M_DONTWAIT);
+               if (m == NULL) {
+                       m_freem(m);
+                       error = ENOBUFS;
+                       goto out;
+               }
+               *mtod(m, sa_family_t *) = AF_INET;
+               m_copyback(m, sizeof(sa_family_t), sizeof(in_addr_t),
+                   (caddr_t)&((satosin(dst)->sin_addr)));
                break;
-       case AF_MPLS:
-               /*
-                * drop MPLS packets entering here. This is a hack to prevent
-                * loops because of misconfiguration.
-                */
-               m_freem(m);
-               error = ENETUNREACH;
-               return (error);
+#endif
        default:
+               m_freem(m);
                error = ENETDOWN;
                goto out;
        }
+
+       m_copyback(m, off, sizeof(shim), (caddr_t)&shim);
+
        s = splnet();
        IFQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
        if (error) {
@@ -210,6 +268,7 @@ mpeoutput(struct ifnet *ifp, struct mbuf
        }
        if_start(ifp);
        splx(s);
+
 out:
        if (error)
                ifp->if_oerrors++;
@@ -224,13 +283,13 @@ mpeioctl(struct ifnet *ifp, u_long cmd, 
        struct mpe_softc        *ifm;
        struct ifreq            *ifr;
        struct shim_hdr          shim;
-       u_int32_t                ttl = htonl(mpls_defttl);
 
        ifr = (struct ifreq *)data;
        error = 0;
        switch (cmd) {
        case SIOCSIFADDR:
-               ifp->if_flags |= IFF_UP;
+               if (!ISSET(ifp->if_flags, IFF_UP))
+                       if_up(ifp);
                break;
        case SIOCSIFFLAGS:
                if (ifp->if_flags & IFF_UP)
@@ -261,8 +320,7 @@ mpeioctl(struct ifnet *ifp, u_long cmd, 
                        error = EINVAL;
                        break;
                }
-               shim.shim_label = (htonl(shim.shim_label << MPLS_LABEL_OFFSET))
-                   | MPLS_BOS_MASK | ttl;
+               shim.shim_label = htonl(shim.shim_label << MPLS_LABEL_OFFSET);
                if (ifm->sc_shim.shim_label == shim.shim_label)
                        break;
                LIST_FOREACH(ifm, &mpeif_list, sc_list) {
@@ -275,6 +333,14 @@ mpeioctl(struct ifnet *ifp, u_long cmd, 
                if (error)
                        break;
                ifm = ifp->if_softc;
+               if (ifm->sc_shim.shim_label) {
+                       /* remove old MPLS route */
+                       mpe_newlabel(ifp, RTM_DELETE, &ifm->sc_shim);
+               }
+               /* add new MPLS route */
+               error = mpe_newlabel(ifp, RTM_ADD, &shim);
+               if (error)
+                       break;
                ifm->sc_shim.shim_label = shim.shim_label;
                break;
        default:
@@ -324,7 +390,7 @@ mpe_input(struct mbuf *m, struct ifnet *
 
 #if NBPFILTER > 0
        if (ifp && ifp->if_bpf)
-               bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+               bpf_mtap_af(ifp->if_bpf, AF_INET, m, BPF_DIRECTION_IN);
 #endif
        s = splnet();
        IF_ENQUEUE(&ipintrq, m);
@@ -358,10 +424,45 @@ mpe_input6(struct mbuf *m, struct ifnet 
 
 #if NBPFILTER > 0
        if (ifp && ifp->if_bpf)
-               bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+               bpf_mtap_af(ifp->if_bpf, AF_INET6, m, BPF_DIRECTION_IN);
 #endif
        s = splnet();
        IF_ENQUEUE(&ip6intrq, m);
        schednetisr(NETISR_IPV6);
        splx(s);
+}
+
+int
+mpe_newlabel(struct ifnet *ifp, int cmd, struct shim_hdr *shim)
+{
+       struct rtentry *nrt;
+       struct sockaddr_mpls dst;
+       struct rt_addrinfo info;
+       int error;
+
+       bzero(&dst, sizeof(dst));
+       dst.smpls_len = sizeof(dst);
+       dst.smpls_family = AF_MPLS;
+       dst.smpls_label = shim->shim_label;
+
+       bzero(&info, sizeof(info));
+       info.rti_flags = RTF_UP | RTF_MPLS;
+       info.rti_mpls = MPLS_OP_POP;
+       info.rti_info[RTAX_DST] = smplstosa(&dst);
+       info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)ifp->if_sadl;
+
+       error = rtrequest1(cmd, &info, RTP_CONNECTED, &nrt, 0);
+       rt_missmsg(cmd, &info, error ? 0 : nrt->rt_flags, ifp, error, 0);
+       if (cmd == RTM_DELETE) {
+               if (error == 0 && nrt != NULL) {
+                       if (nrt->rt_refcnt <= 0) {
+                               nrt->rt_refcnt++;
+                               rtfree(nrt);
+                       }
+               }
+       }
+       if (cmd == RTM_ADD && error == 0 && nrt != NULL) {
+               nrt->rt_refcnt--;
+       }
+       return (error);
 }
Index: sys/netmpls/mpls.h
===================================================================
RCS file: /cvs/src/sys/netmpls/mpls.h,v
retrieving revision 1.21
diff -u -p -r1.21 mpls.h
--- sys/netmpls/mpls.h  28 Apr 2009 12:07:43 -0000      1.21
+++ sys/netmpls/mpls.h  25 Mar 2010 11:34:00 -0000
@@ -179,7 +179,8 @@ struct mbuf *mpls_shim_push(struct mbuf 
 
 int             mpls_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 void            mpls_input(struct mbuf *);
-struct mbuf    *mpls_output(struct mbuf *, struct rtentry *);
+int             mpls_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+                   struct rtentry *);
 
 void            mpls_ip_input(struct mbuf *, u_int8_t);
 void            mpls_ip6_input(struct mbuf *, u_int8_t);
Index: sys/netmpls/mpls_input.c
===================================================================
RCS file: /cvs/src/sys/netmpls/mpls_input.c,v
retrieving revision 1.22
diff -u -p -r1.22 mpls_input.c
--- sys/netmpls/mpls_input.c    7 May 2010 13:33:17 -0000       1.22
+++ sys/netmpls/mpls_input.c    12 May 2010 08:41:45 -0000
@@ -95,7 +95,7 @@ mpls_input(struct mbuf *m)
        u_int8_t ttl;
        int i, hasbos;
 
-       if (!mpls_enable) {
+       if (!mpls_enable || !ISSET(ifp->if_xflags, IFXF_MPLS)) {
                m_freem(m);
                return;
        }
@@ -132,11 +132,11 @@ mpls_input(struct mbuf *m)
        }
        ttl--;
 
+       bzero(&sa_mpls, sizeof(sa_mpls));
+       smpls = &sa_mpls;
+       smpls->smpls_family = AF_MPLS;
+       smpls->smpls_len = sizeof(*smpls);
        for (i = 0; i < mpls_inkloop; i++) {
-               bzero(&sa_mpls, sizeof(sa_mpls));
-               smpls = &sa_mpls;
-               smpls->smpls_family = AF_MPLS;
-               smpls->smpls_len = sizeof(*smpls);
                smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
 
 #ifdef MPLS_DEBUG
@@ -151,15 +151,13 @@ mpls_input(struct mbuf *m)
                        m = mpls_shim_pop(m);
                        shim = mtod(m, struct shim_hdr *);
 
-                       switch (ntohl(smpls->smpls_label)) { 
-
+                       switch (ntohl(smpls->smpls_label)) {
                        case MPLS_LABEL_IPV4NULL:
                                if (hasbos) {
                                        mpls_ip_input(m, ttl);
                                        goto done;
                                } else
                                        continue;
-
                        case MPLS_LABEL_IPV6NULL:
                                if (hasbos) {
                                        mpls_ip6_input(m, ttl);
@@ -184,7 +182,6 @@ mpls_input(struct mbuf *m)
                }
 
                rt->rt_use++;
-               smpls = satosmpls(rt_key(rt));
                rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
 
                if (rt_mpls == NULL || (rt->rt_flags & RTF_MPLS) == 0) {
@@ -196,17 +193,14 @@ mpls_input(struct mbuf *m)
                        goto done;
                }
 
-               if (rt_mpls->mpls_operation == MPLS_OP_LOCAL) {
+               hasbos = MPLS_BOS_ISSET(shim->shim_label);
+               switch (rt_mpls->mpls_operation) {
+               case MPLS_OP_LOCAL:
                        /* Packet is for us */
-                       hasbos = MPLS_BOS_ISSET(shim->shim_label);
-                       if (!hasbos) {
-#ifdef MPLS_DEBUG
-                               printf("MPLS_DEBUG: packet malformed\n");
-#endif
-                               m_freem(m);
-                               goto done;
-                       }
                        m = mpls_shim_pop(m);
+                       if (!hasbos)
+                               /* redo lookup with next label */
+                               break;
 
                        if (!rt->rt_gateway) {
 #ifdef MPLS_DEBUG
@@ -227,16 +221,13 @@ mpls_input(struct mbuf *m)
                        default:
                                m_freem(m);
                        }
-
                        goto done;
-               }
-
-               if (rt_mpls->mpls_operation & MPLS_OP_POP) {
-                       hasbos = MPLS_BOS_ISSET(shim->shim_label);
+               case MPLS_OP_POP:
+                       m = mpls_shim_pop(m);
                        if (hasbos) {
-                               m = mpls_shim_pop(m);
 #if NMPE > 0
                                if (rt->rt_ifp->if_type == IFT_MPLS) {
+                                       smpls = satosmpls(rt_key(rt));
                                        mpe_input(m, rt->rt_ifp, smpls, ttl);
                                        goto done;
                                }
@@ -245,13 +236,23 @@ mpls_input(struct mbuf *m)
                                m_freem(m);
                                goto done;
                        }
+                       break;
+               case MPLS_OP_PUSH:
+                       m = mpls_shim_push(m, rt_mpls);
+                       break;
+               case MPLS_OP_SWAP:
+                       m = mpls_shim_swap(m, rt_mpls);
+                       break;
                }
 
+               if (m == NULL)
+                       goto done;
+
                /* refetch label */
                shim = mtod(m, struct shim_hdr *);
-               ifp = rt->rt_ifp;
 
-               if (ifp != NULL)  
+               ifp = rt->rt_ifp;
+               if (ifp != NULL && rt_mpls->mpls_operation != MPLS_OP_LOCAL)
                        break;
 
                RTFREE(rt);
@@ -273,14 +274,22 @@ mpls_input(struct mbuf *m)
            MPLS_LABEL_GET(rt_mpls->mpls_label));
 #endif
 
-       (*ifp->if_output)(ifp, m, smplstosa(smpls), rt);
+       /* Output iface is not MPLS-enabled */
+       if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
+#ifdef MPLS_DEBUG
+               printf("MPLS_DEBUG: interface not mpls enabled\n");
+#endif
+               goto done;
+       }
+
+       (*ifp->if_ll_output)(ifp, m, smplstosa(smpls), rt);
 done:
        if (rt)
                RTFREE(rt);
 }
 
 void
-mpls_ip_input(struct mbuf *m, u_int8_t ttl) 
+mpls_ip_input(struct mbuf *m, u_int8_t ttl)
 {
        struct ip       *ip;
        int              s, hlen;
Index: sys/netmpls/mpls_output.c
===================================================================
RCS file: /cvs/src/sys/netmpls/mpls_output.c,v
retrieving revision 1.8
diff -u -p -r1.8 mpls_output.c
--- sys/netmpls/mpls_output.c   7 May 2010 13:33:17 -0000       1.8
+++ sys/netmpls/mpls_output.c   12 May 2010 08:42:21 -0000
@@ -27,66 +27,67 @@
 
 #include <netmpls/mpls.h>
 
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
 extern int     mpls_inkloop;
 
 #ifdef MPLS_DEBUG
 #define MPLS_LABEL_GET(l)      ((ntohl((l) & MPLS_LABEL_MASK)) >> 
MPLS_LABEL_OFFSET)
 #endif
 
-struct mbuf *
-mpls_output(struct mbuf *m, struct rtentry *rt0)
+void   mpls_do_cksum(struct mbuf *);
+
+int
+mpls_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt0)
 {
-       struct ifnet            *ifp = m->m_pkthdr.rcvif;
+       struct ifnet            *ifp = ifp0;
        struct sockaddr_mpls    *smpls;
        struct sockaddr_mpls     sa_mpls;
        struct shim_hdr         *shim;
        struct rtentry          *rt = rt0;
        struct rt_mpls          *rt_mpls;
-       int                      i;
+       int                      i, error;
 
-       if (!mpls_enable) {
-               m_freem(m);
-               goto bad;
+       if (!mpls_enable || rt0 == NULL || (dst->sa_family != AF_INET &&
+           dst->sa_family != AF_INET6 && dst->sa_family != AF_MPLS)) {
+               if (!ISSET(ifp->if_xflags, IFXF_MPLS))
+                       return (ifp->if_output(ifp, m, dst, rt));
+               else
+                       return (ifp->if_ll_output(ifp, m, dst, rt));
        }
 
-       /* reset broadcast and multicast flags, this is a P2P tunnel */
-       m->m_flags &= ~(M_BCAST | M_MCAST);
+       /* need to calculate checksums now if necessary */
+       if (m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT | M_TCPV4_CSUM_OUT |
+           M_UDPV4_CSUM_OUT))
+               mpls_do_cksum(m);
+
+       /* initialize sockaddr_mpls */
+       bzero(&sa_mpls, sizeof(sa_mpls));
+       smpls = &sa_mpls;
+       smpls->smpls_family = AF_MPLS;
+       smpls->smpls_len = sizeof(*smpls);
 
        for (i = 0; i < mpls_inkloop; i++) {
-               if (rt == NULL) {
-                       shim = mtod(m, struct shim_hdr *);
-
-                       bzero(&sa_mpls, sizeof(sa_mpls));
-                       smpls = &sa_mpls;
-                       smpls->smpls_family = AF_MPLS;
-                       smpls->smpls_len = sizeof(*smpls);
-                       smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
-
-                       rt = rtalloc1(smplstosa(smpls), RT_REPORT, 0);
-                       if (rt == NULL) {
-                               /* no entry for this label */
-#ifdef MPLS_DEBUG
-                               printf("MPLS_DEBUG: label not found\n");
-#endif
-                               m_freem(m);
-                               goto bad;
-                       }
-                       rt->rt_use++;
-               }
-
                rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
                if (rt_mpls == NULL || (rt->rt_flags & RTF_MPLS) == 0) {
                        /* no MPLS information for this entry */
+                       if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
 #ifdef MPLS_DEBUG
-                       printf("MPLS_DEBUG: no MPLS information attached\n");
+                               printf("MPLS_DEBUG: interface not mpls 
enabled\n");
 #endif
-                       m_freem(m);
-                       goto bad;
-               }
+                               error = ENETUNREACH;
+                               goto bad;
+                       }
 
-               switch (rt_mpls->mpls_operation & (MPLS_OP_PUSH | MPLS_OP_POP |
-                   MPLS_OP_SWAP)) {
+                       return (ifp->if_ll_output(ifp0, m, dst, rt0));
+               }
 
+               switch (rt_mpls->mpls_operation) {
                case MPLS_OP_PUSH:
                        m = mpls_shim_push(m, rt_mpls);
                        break;
@@ -97,29 +98,45 @@ mpls_output(struct mbuf *m, struct rtent
                        m = mpls_shim_swap(m, rt_mpls);
                        break;
                default:
-                       m_freem(m);
+                       error = EINVAL;
                        goto bad;
                }
 
-               if (m == NULL)
+               if (m == NULL) {
+                       error = ENOBUFS;
                        goto bad;
+               }
 
                /* refetch label */
                shim = mtod(m, struct shim_hdr *);
-               ifp = rt->rt_ifp;
+               /* mark first label with BOS flag */
+               if (rt0 == rt && dst->sa_family != AF_MPLS)
+                       shim->shim_label |= MPLS_BOS_MASK;
 
+               ifp = rt->rt_ifp;
                if (ifp != NULL)
                        break;
 
-               if (rt0 != rt)
-                       RTFREE(rt);
+               shim = mtod(m, struct shim_hdr *);
+               smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
 
-               rt = NULL;
+               rt = rtalloc1(smplstosa(smpls), RT_REPORT, 0);
+               if (rt == NULL) {
+                       /* no entry for this label */
+#ifdef MPLS_DEBUG
+                       printf("MPLS_DEBUG: label %d not found\n",
+                           MPLS_LABEL_GET(shim->shim_label));
+#endif
+                       error = EHOSTUNREACH;
+                       goto bad;
+               }
+               rt->rt_use++;
+               rt->rt_refcnt--;
        }
 
        /* write back TTL */
        shim->shim_label &= ~MPLS_TTL_MASK;
-       shim->shim_label |= MPLS_BOS_MASK | htonl(mpls_defttl);
+       shim->shim_label |= htonl(mpls_defttl);
 
 #ifdef MPLS_DEBUG
        printf("MPLS: sending on %s outshim %x outlabel %d\n",
@@ -127,13 +144,42 @@ mpls_output(struct mbuf *m, struct rtent
            MPLS_LABEL_GET(rt_mpls->mpls_label));
 #endif
 
-       if (rt != rt0)
-               RTFREE(rt);
+       /* Output iface is not MPLS-enabled */
+       if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
+#ifdef MPLS_DEBUG
+               printf("MPLS_DEBUG: interface not mpls enabled\n");
+#endif
+               error = ENETUNREACH;
+               goto bad;
+       }
+
+       /* reset broadcast and multicast flags, this is a P2P tunnel */
+       m->m_flags &= ~(M_BCAST | M_MCAST);
 
-       return (m);
+       smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
+       return (ifp->if_ll_output(ifp, m, smplstosa(smpls), rt));
 bad:
-       if (rt != rt0)
-               RTFREE(rt);
+       if (m)
+               m_freem(m);
+       return (error);
+}
 
-       return (NULL);
+void
+mpls_do_cksum(struct mbuf *m)
+{
+#ifdef INET
+       struct ip *ip;
+       u_int16_t hlen;
+
+       if (m->m_pkthdr.csum_flags & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
+               in_delayed_cksum(m);
+               m->m_pkthdr.csum_flags &= ~(M_UDPV4_CSUM_OUT|M_TCPV4_CSUM_OUT);
+       }
+       if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) {
+               ip = mtod(m, struct ip *);
+               hlen = ip->ip_hl << 2;
+               ip->ip_sum = in_cksum(m, hlen);
+               m->m_pkthdr.csum_flags &= ~M_IPV4_CSUM_OUT;
+       }
+#endif
 }

Reply via email to