On 04/11/16(Fri) 10:45, Claudio Jeker wrote:
> On Wed, Nov 02, 2016 at 05:44:14PM +0100, Martin Pieuchot wrote:
> > [..] 
> > Diff below should fix that by automagically creating a loopback
> > interface when a new routing domain is created.  That mean loX will
> > now correspond to routing domain X.
> 
> I like this a lot. I think having a loopback interface per rdomain is kind
> of required but it was something I never finished.
> If we could assign 127.0.0.1 and ::1 to those interfaces automatically
> that would be even better (but that can be done as a second step).

Adding ::1 is trivial, it's a one line change.

To add 127.0.0.1 properly it's another story as currently netstart(8)
sets it.

> > Not that if you are currently using a lo2 and a rdomain 2 this diff
> > will break your rdomain setup.
> 
> Should we split loopbacks into lo(4) and e.g. loop(4) one is for loopback
> interfaces from userland and the other is only there for the 127.0.0.1 and
> ::1 loopbacks? At least that we the conflict would be solved since we then
> have different namespaces. Again this is something we can do on top of
> this.

I'm not sure to understand the benefit.  What's the use case for loop(4)? 

> > I'll write the manual page for rtable_loindex(9) if you're ok with
> > the approach.
> 
> I think this approach is fine. The only thing I dislike is how you cram 
> ifindex and rdomain id into the rtable 2 rdomain mapping. It feels dirty
> to me to do it that way instead of using a little struct holding the two
> values as independent data.

It is dirty but using a small struct is not possible without revamping the
mp-safeness of rtable_get(9) which is, IMHO, not worth the effort. 

Here's an updated diff with man page, I'm asking for oks now that
semarie confirmed the m_pullup(9) regression isn't part of my diff. 

Index: sys/kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.262
diff -u -p -r1.262 init_main.c
--- sys/kern/init_main.c        7 Nov 2016 00:26:32 -0000       1.262
+++ sys/kern/init_main.c        8 Nov 2016 14:22:12 -0000
@@ -389,6 +389,9 @@ main(void *framep)
        msginit();
 #endif
 
+       /* Create default routing table before attaching lo0. */
+       rtable_init();
+
        /* Attach pseudo-devices. */
        for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
                if (pdev->pdev_count > 0)
@@ -398,8 +401,6 @@ main(void *framep)
        crypto_init();
        swcr_init();
 #endif /* CRYPTO */
-
-       rtable_init();
 
        /*
         * Initialize protocols.  Block reception of incoming packets
Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.458
diff -u -p -r1.458 if.c
--- sys/net/if.c        8 Nov 2016 10:47:10 -0000       1.458
+++ sys/net/if.c        8 Nov 2016 14:22:12 -0000
@@ -258,7 +258,6 @@ struct srp_gc if_ifp_gc = SRP_GC_INITIAL
 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
 
 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
-unsigned int lo0ifidx;
 
 void
 if_idxmap_init(unsigned int limit)
@@ -1350,12 +1349,7 @@ p2p_rtrequest(struct ifnet *ifp, int req
 
                KASSERT(ifa == rt->rt_ifa);
 
-               /*
-                * XXX Since lo0 is in the default rdomain we should not
-                * (ab)use it for any route related to an interface of a
-                * different rdomain.
-                */
-               lo0ifp = if_get(lo0ifidx);
+               lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
                KASSERT(lo0ifp != NULL);
                TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
                        if (lo0ifa->ifa_addr->sa_family ==
@@ -1438,7 +1432,7 @@ if_up(struct ifnet *ifp)
 
 #ifdef INET6
        /* Userland expects the kernel to set ::1 on lo0. */
-       if (ifp->if_index == lo0ifidx)
+       if (ifp->if_index == rtable_loindex(0))
                in6_ifattach(ifp);
 #endif
 
@@ -1605,14 +1599,31 @@ if_setrdomain(struct ifnet *ifp, int rdo
        if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
                return (EINVAL);
 
-       /* make sure that the routing table exists */
+       /*
+        * Create the routing table if it does not exist, including its
+        * loopback interface with unit == rdomain.
+        */
        if (!rtable_exists(rdomain)) {
+               struct ifnet *loifp;
+               char loifname[IFNAMSIZ];
+
+               snprintf(loifname, sizeof(loifname), "lo%d", rdomain);
+               if ((error = if_clone_create(loifname, 0)))
+                       return (error);
+
+               if ((loifp = ifunit(loifname)) == NULL)
+                       return (ENXIO);
+
                s = splsoftnet();
                if ((error = rtable_add(rdomain)) == 0)
-                       rtable_l2set(rdomain, rdomain);
+                       rtable_l2set(rdomain, rdomain, loifp->if_index);
                splx(s);
-               if (error)
+               if (error) {
+                       if_clone_destroy(loifname);
                        return (error);
+               }
+
+               loifp->if_rdomain = rdomain;
        }
 
        /* make sure that the routing table is a real rdomain */
Index: sys/net/if_loop.c
===================================================================
RCS file: /cvs/src/sys/net/if_loop.c,v
retrieving revision 1.76
diff -u -p -r1.76 if_loop.c
--- sys/net/if_loop.c   13 Apr 2016 11:41:15 -0000      1.76
+++ sys/net/if_loop.c   8 Nov 2016 14:22:12 -0000
@@ -121,6 +121,7 @@
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
+#include <net/rtable.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
@@ -182,7 +183,7 @@ loop_clone_create(struct if_clone *ifc, 
        if (unit == 0) {
                if_attachhead(ifp);
                if_addgroup(ifp, ifc->ifc_name);
-               lo0ifidx = ifp->if_index;
+               rtable_l2set(0, 0, ifp->if_index);
        } else
                if_attach(ifp);
        if_alloc_sadl(ifp);
@@ -195,7 +196,7 @@ loop_clone_create(struct if_clone *ifc, 
 int
 loop_clone_destroy(struct ifnet *ifp)
 {
-       if (ifp->if_index == lo0ifidx)
+       if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
                return (EPERM);
 
        if_detach(ifp);
Index: sys/net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.76
diff -u -p -r1.76 if_var.h
--- sys/net/if_var.h    8 Nov 2016 10:46:05 -0000       1.76
+++ sys/net/if_var.h    8 Nov 2016 14:22:12 -0000
@@ -291,7 +291,6 @@ int         niq_enlist(struct niqueue *, struct
     sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
 
 extern struct ifnet_head ifnet;
-extern unsigned int lo0ifidx;
 extern struct taskq *softnettq;
 
 void   if_start(struct ifnet *);
Index: sys/net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.334
diff -u -p -r1.334 route.c
--- sys/net/route.c     8 Nov 2016 10:39:32 -0000       1.334
+++ sys/net/route.c     8 Nov 2016 14:22:12 -0000
@@ -197,8 +197,6 @@ route_init(void)
        while (rt_hashjitter == 0)
                rt_hashjitter = arc4random();
 
-       if (rtable_add(0) != 0)
-               panic("route_init rtable_add");
 #ifdef BFD
        bfdinit();
 #endif
Index: sys/net/rtable.c
===================================================================
RCS file: /cvs/src/sys/net/rtable.c,v
retrieving revision 1.52
diff -u -p -r1.52 rtable.c
--- sys/net/rtable.c    7 Sep 2016 09:36:49 -0000       1.52
+++ sys/net/rtable.c    8 Nov 2016 14:22:12 -0000
@@ -1,7 +1,7 @@
 /*     $OpenBSD: rtable.c,v 1.52 2016/09/07 09:36:49 mpi Exp $ */
 
 /*
- * Copyright (c) 2014-2015 Martin Pieuchot
+ * Copyright (c) 2014-2016 Martin Pieuchot
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -41,7 +41,7 @@
  *     afmap               rtmap/dommp
  *   -----------          ---------     -----
  *   |   0     |--------> | 0 | 0 | ... | 0 |  Array mapping rtableid (=index)
- *   -----------          ---------     -----   to rdomain (=value).
+ *   -----------          ---------     -----   to rdomain/loopback (=value).
  *   | AF_INET |.
  *   ----------- `.       .---------.     .---------.
  *       ...      `----> | rtable0 | ... | rtableN |   Array of pointers for
@@ -59,10 +59,20 @@ struct rtmap {
        void             **tbl;
 };
 
-/* Array of rtableid -> rdomain mapping. */
+/*
+ * Array of rtableid -> rdomain mapping.
+ *
+ * Only used for the first index as describbed above.
+ */
 struct dommp {
        unsigned int       limit;
-       unsigned int      *dom;
+       /*
+        * Array to get the routing domain and loopback interface related to
+        * a routing table. Format:
+        *
+        * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain
+        */
+       unsigned int      *value;
 };
 
 unsigned int      rtmap_limit = 0;
@@ -146,6 +156,8 @@ rtable_init(void)
        unsigned int     keylen = 0;
        int              i;
 
+       KASSERT(sizeof(struct rtmap) == sizeof(struct dommp));
+
        /* We use index 0 for the rtable/rdomain map. */
        af2idx_max = 1;
        memset(af2idx, 0, sizeof(af2idx));
@@ -173,6 +185,9 @@ rtable_init(void)
            M_WAITOK|M_ZERO);
 
        rtmap_init();
+
+       if (rtable_add(0) != 0)
+               panic("unable to create default routing table");
 }
 
 int
@@ -221,7 +236,7 @@ rtable_add(unsigned int id)
 
        /* Use main rtable/rdomain by default. */
        dmm = srp_get_locked(&afmap[0]);
-       dmm->dom[id] = 0;
+       dmm->value[id] = 0;
 
        return (0);
 }
@@ -272,24 +287,42 @@ rtable_l2(unsigned int rtableid)
 
        dmm = srp_enter(&sr, &afmap[0]);
        if (rtableid < dmm->limit)
-               rdomain = dmm->dom[rtableid];
+               rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK);
        srp_leave(&sr);
 
        return (rdomain);
 }
 
+unsigned int
+rtable_loindex(unsigned int rtableid)
+{
+       struct dommp    *dmm;
+       unsigned int     loifidx = 0;
+       struct srp_ref   sr;
+
+       dmm = srp_enter(&sr, &afmap[0]);
+       if (rtableid < dmm->limit)
+               loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS);
+       srp_leave(&sr);
+
+       return (loifidx);
+}
+
 void
-rtable_l2set(unsigned int rtableid, unsigned int rdomain)
+rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx)
 {
        struct dommp    *dmm;
+       unsigned int     value;
 
        KERNEL_ASSERT_LOCKED();
 
        if (!rtable_exists(rtableid) || !rtable_exists(rdomain))
                return;
 
+       value = (rdomain & RT_TABLEID_MASK) + (loifidx << RT_TABLEID_BITS);
+
        dmm = srp_get_locked(&afmap[0]);
-       dmm->dom[rtableid] = rdomain;
+       dmm->value[rtableid] = value;
 }
 
 #ifndef ART
Index: sys/net/rtable.h
===================================================================
RCS file: /cvs/src/sys/net/rtable.h,v
retrieving revision 1.16
diff -u -p -r1.16 rtable.h
--- sys/net/rtable.h    7 Sep 2016 09:36:49 -0000       1.16
+++ sys/net/rtable.h    8 Nov 2016 14:22:12 -0000
@@ -54,7 +54,8 @@ void           rtable_init(void);
 int             rtable_exists(unsigned int);
 int             rtable_add(unsigned int);
 unsigned int    rtable_l2(unsigned int);
-void            rtable_l2set(unsigned int, unsigned int);
+unsigned int    rtable_loindex(unsigned int);
+void            rtable_l2set(unsigned int, unsigned int, unsigned int);
 
 struct rtentry *rtable_lookup(unsigned int, struct sockaddr *,
                     struct sockaddr *, struct sockaddr *, uint8_t);
Index: sys/netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.327
diff -u -p -r1.327 ip_output.c
--- sys/netinet/ip_output.c     4 Sep 2016 17:18:56 -0000       1.327
+++ sys/netinet/ip_output.c     8 Nov 2016 14:22:12 -0000
@@ -211,7 +211,7 @@ reroute:
 
                ia = ifatoia(ro->ro_rt->rt_ifa);
                if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL))
-                       ifp = if_get(lo0ifidx);
+                       ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid));
                else
                        ifp = if_get(ro->ro_rt->rt_ifidx);
                if (ifp == NULL) {
Index: sys/netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.168
diff -u -p -r1.168 ip6_input.c
--- sys/netinet6/ip6_input.c    24 Aug 2016 09:41:12 -0000      1.168
+++ sys/netinet6/ip6_input.c    8 Nov 2016 14:22:12 -0000
@@ -211,7 +211,10 @@ ip6_input(struct mbuf *m)
        } else {
                if (m->m_next) {
                        if (m->m_flags & M_LOOP) {
-                               ip6stat.ip6s_m2m[lo0ifidx]++;   /*XXX*/
+                               int ifidx;
+
+                               ifidx = rtable_loindex(m->m_pkthdr.ph_rtableid);
+                               ip6stat.ip6s_m2m[ifidx]++;
                        } else if (ifp->if_index < nitems(ip6stat.ip6s_m2m))
                                ip6stat.ip6s_m2m[ifp->if_index]++;
                        else
Index: sys/netinet6/ip6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.216
diff -u -p -r1.216 ip6_output.c
--- sys/netinet6/ip6_output.c   19 Sep 2016 18:09:09 -0000      1.216
+++ sys/netinet6/ip6_output.c   8 Nov 2016 14:22:12 -0000
@@ -460,7 +460,7 @@ reroute:
                        goto bad;
                }
                if (ISSET(rt->rt_flags, RTF_LOCAL))
-                       ifp = if_get(lo0ifidx);
+                       ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid));
                else
                        ifp = if_get(rt->rt_ifidx);
        } else {
Index: sys/sys/socket.h
===================================================================
RCS file: /cvs/src/sys/sys/socket.h,v
retrieving revision 1.92
diff -u -p -r1.92 socket.h
--- sys/sys/socket.h    28 Sep 2016 18:50:20 -0000      1.92
+++ sys/sys/socket.h    8 Nov 2016 14:22:12 -0000
@@ -143,7 +143,9 @@ struct      splice {
 /*
  * Maximum number of alternate routing tables
  */
-#define        RT_TABLEID_MAX  255
+#define        RT_TABLEID_MAX          255
+#define        RT_TABLEID_BITS         8
+#define        RT_TABLEID_MASK         0xff
 
 #endif /* __BSD_VISIBLE */
 
Index: share/man/man9/rtable_add.9
===================================================================
RCS file: /cvs/src/share/man/man9/rtable_add.9,v
retrieving revision 1.7
diff -u -p -r1.7 rtable_add.9
--- share/man/man9/rtable_add.9 11 Dec 2015 16:08:30 -0000      1.7
+++ share/man/man9/rtable_add.9 8 Nov 2016 14:28:25 -0000
@@ -21,6 +21,7 @@
 .Sh NAME
 .Nm rtable_add ,
 .Nm rtable_exists ,
+.Nm rtable_loindex ,
 .Nm rtable_l2 ,
 .Nm rtable_l2set
 .Nd routing tables and routing domains interface
@@ -31,6 +32,8 @@
 .Ft int
 .Fn rtable_exists "unsigned int id"
 .Ft unsigned int
+.Fn rtable_loindex "unsigned int id"
+.Ft unsigned int
 .Fn rtable_l2 "unsigned int id"
 .Ft void
 .Fn rtable_l2set "unsigned int id" "unsigned int rdomain"
@@ -53,6 +56,11 @@ if routing table with ID
 exists,
 .Fa 0
 otherwise.
+.It Fn rtable_loindex "unsigned int id"
+Return the default
+.Xr lo 4
+interface index for the routing table with ID of
+.Fa id .
 .It Fn rtable_l2 "unsigned int id"
 Get the routing domain of routing table with ID of
 .Fa id .
@@ -65,6 +73,7 @@ under the routing domain with ID of
 .Sh CONTEXT
 .Fn rtable_add ,
 .Fn rtable_exists ,
+.Fn rtable_loindex ,
 .Fn rtable_l2 ,
 and
 .Fn task_l2set
@@ -82,5 +91,6 @@ already exists.
 Memory could not be allocated to extend the list of routing domains.
 .El
 .Sh SEE ALSO
+.Xr lo 4 ,
 .Xr route 4 ,
 .Xr route 8

Reply via email to