On Wed, Nov 02, 2016 at 05:44:14PM +0100, Martin Pieuchot wrote:
> On 28/10/16(Fri) 16:27, Claudio Jeker wrote:
> > On Fri, Oct 28, 2016 at 04:19:35PM +0200, Nils Frohberg wrote:
> > > I currently cannot access the local IP of an interface on rdomain 1:
> > >
> > > Script started on Fri Oct 28 15:02:20 2016
> > > $ doas pfctl -d
> > > pfctl: pf not enabled
> > > $ doas ifconfig vether0
> > > vether0: no such interface
> > > $ doas ifconfig vether0 rdomain 1
> > > $ doas ifconfig vether0 inet 192.168.42.2
> > > $ doas ifconfig vether0
> > > vether0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> rdomain 1 mtu
> > > 1500
> > > lladdr fe:e1:ba:d1:73:55
> > > index 7 priority 0 llprio 3
> > > groups: vether
> > > media: Ethernet autoselect
> > > status: active
> > > inet 192.168.42.2 netmask 0xffffff00 broadcast 192.168.42.255
> > > $ route -T1 -n show -inet
> > > Routing tables
> > >
> > > Internet:
> > > Destination Gateway Flags Refs Use Mtu Prio
> > > Iface
> > > 192.168.42/24 192.168.42.2 UCn 0 0 - 4
> > > vether0
> > > 192.168.42.2 fe:e1:ba:d1:73:55 UHLl 0 0 - 1
> > > vether0
> > > 192.168.42.255 192.168.42.2 UHb 0 0 - 1
> > > vether0
> > > $ ping -V1 192.168.42.2
> > > PING 192.168.42.2 (192.168.42.2): 56 data bytes
> > > ^C
> > > --- 192.168.42.2 ping statistics ---
> > > 4 packets transmitted, 0 packets received, 100.0% packet loss
> > > $ ^D
> > >
> > > Script done on Fri Oct 28 15:04:16 2016
> > >
> > > Connecting to other boxes via routes on rdomain 1 works as usual
> > > (not shown in the script output above).
> > >
> > > I tracked the problem to being caused by v1.455 of if.c. It seems
> > > that ifp->if_rdomain == 0 when running the ping above. In this case,
> > > m->m_pkthdr.ph_rtableid should be set back to 1 after the reset
> > > though (as it was before m_resethdr(m)). Shouldn't ifp->if_rdomain
> > > be equal to 1 in this case? If yes, there must be a bug earlier in
> > > the code.
> > >
> > > The following diff fixes things for me, but I don't know if
> > > ifp->if_rdomain isn't the one that needs to be fixed:
> >
> > Can you check the ifp->if_index. I bet it is pointing to the loopback
> > interface lo0 and so you end up with the wrong rdomain id.
> > This is a nasty consequence of using lo0 accross domains.
>
> Diff below should fix that by automagically creating a loopback
> interface when a new routing domain is created. That mean loX will
> now correspond to routing domain X.
Thanks (to all) for looking into this. Your patch works for me:
Script started on Thu Nov 3 08:30:47 2016
$ doas pfctl -d
pfctl: pf not enabled
$ ifconfig vether0
vether0: no such interface
$ ifconfig lo1
lo1: no such interface
$ doas ifconfig vether0 rdomain 1
$ ifconfig lo1
lo1: flags=8008<LOOPBACK,MULTICAST> rdomain 1 mtu 32768
index 7 priority 0 llprio 3
groups: lo
$ doas ifconfig vether0 inet 192.168.42.2
$ ifconfig vether0
vether0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> rdomain 1 mtu 1500
lladdr fe:e1:ba:d0:20:89
index 6 priority 0 llprio 3
groups: vether
media: Ethernet autoselect
status: active
inet 192.168.42.2 netmask 0xffffff00 broadcast 192.168.42.255
$ route -T1 -n show -inet
Routing tables
Internet:
Destination Gateway Flags Refs Use Mtu Prio Iface
192.168.42/24 192.168.42.2 UCn 0 0 - 4 vether0
192.168.42.2 fe:e1:ba:d0:20:89 UHLl 0 0 - 1 vether0
192.168.42.255 192.168.42.2 UHb 0 0 - 1 vether0
$ ping -V1 192.168.42.2
PING 192.168.42.2 (192.168.42.2): 56 data bytes
64 bytes from 192.168.42.2: icmp_seq=0 ttl=255 time=0.095 ms
64 bytes from 192.168.42.2: icmp_seq=1 ttl=255 time=0.087 ms
64 bytes from 192.168.42.2: icmp_seq=2 ttl=255 time=0.026 ms
64 bytes from 192.168.42.2: icmp_seq=3 ttl=255 time=0.069 ms
^C
--- 192.168.42.2 ping statistics ---
4 packets transmitted, 4 packets received, 0.0% packet loss
round-trip min/avg/max/std-dev = 0.026/0.069/0.095/0.027 ms
$ ^D
Script done on Thu Nov 3 08:34:03 2016
TCP connections also work (tested with a little nc -l/nc dance).
>
> Not that if you are currently using a lo2 and a rdomain 2 this diff
> will break your rdomain setup.
>
> I'll write the manual page for rtable_loindex(9) if you're ok with
> the approach.
>
> Index: kern/init_main.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_main.c,v
> retrieving revision 1.261
> diff -u -p -r1.261 init_main.c
> --- kern/init_main.c 24 Oct 2016 04:38:44 -0000 1.261
> +++ kern/init_main.c 2 Nov 2016 16:05:43 -0000
> @@ -388,6 +388,9 @@ main(void *framep)
> msginit();
> #endif
>
> + /* Create default routing table before attaching lo0. */
> + rtable_init();
> +
> /* Attach pseudo-devices. */
> for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
> if (pdev->pdev_count > 0)
> @@ -397,8 +400,6 @@ main(void *framep)
> crypto_init();
> swcr_init();
> #endif /* CRYPTO */
> -
> - rtable_init();
>
> /*
> * Initialize protocols. Block reception of incoming packets
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.456
> diff -u -p -r1.456 if.c
> --- net/if.c 19 Oct 2016 02:05:49 -0000 1.456
> +++ net/if.c 2 Nov 2016 16:29:39 -0000
> @@ -259,7 +259,6 @@ struct srp_gc if_ifp_gc = SRP_GC_INITIAL
> struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
>
> struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
> -unsigned int lo0ifidx;
>
> void
> if_idxmap_init(unsigned int limit)
> @@ -1392,12 +1391,7 @@ p2p_rtrequest(struct ifnet *ifp, int req
>
> KASSERT(ifa == rt->rt_ifa);
>
> - /*
> - * XXX Since lo0 is in the default rdomain we should not
> - * (ab)use it for any route related to an interface of a
> - * different rdomain.
> - */
> - lo0ifp = if_get(lo0ifidx);
> + lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
> KASSERT(lo0ifp != NULL);
> TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
> if (lo0ifa->ifa_addr->sa_family ==
> @@ -1480,7 +1474,7 @@ if_up(struct ifnet *ifp)
>
> #ifdef INET6
> /* Userland expects the kernel to set ::1 on lo0. */
> - if (ifp->if_index == lo0ifidx)
> + if (ifp->if_index == rtable_loindex(0))
> in6_ifattach(ifp);
> #endif
>
> @@ -1647,14 +1641,31 @@ if_setrdomain(struct ifnet *ifp, int rdo
> if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
> return (EINVAL);
>
> - /* make sure that the routing table exists */
> + /*
> + * Create the routing table if it does not exist, including its
> + * loopback interface with unit == rdomain.
> + */
> if (!rtable_exists(rdomain)) {
> + struct ifnet *loifp;
> + char loifname[IFNAMSIZ];
> +
> + snprintf(loifname, sizeof(loifname), "lo%d", rdomain);
> + if ((error = if_clone_create(loifname, 0)))
> + return (error);
> +
> + if ((loifp = ifunit(loifname)) == NULL)
> + return (ENXIO);
> +
> s = splsoftnet();
> if ((error = rtable_add(rdomain)) == 0)
> - rtable_l2set(rdomain, rdomain);
> + rtable_l2set(rdomain, rdomain, loifp->if_index);
> splx(s);
> - if (error)
> + if (error) {
> + if_clone_destroy(loifname);
> return (error);
> + }
> +
> + loifp->if_rdomain = rdomain;
> }
>
> /* make sure that the routing table is a real rdomain */
> Index: net/if_loop.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_loop.c,v
> retrieving revision 1.76
> diff -u -p -r1.76 if_loop.c
> --- net/if_loop.c 13 Apr 2016 11:41:15 -0000 1.76
> +++ net/if_loop.c 2 Nov 2016 16:26:03 -0000
> @@ -121,6 +121,7 @@
> #include <net/if_var.h>
> #include <net/if_types.h>
> #include <net/netisr.h>
> +#include <net/rtable.h>
> #include <net/route.h>
>
> #include <netinet/in.h>
> @@ -182,7 +183,7 @@ loop_clone_create(struct if_clone *ifc,
> if (unit == 0) {
> if_attachhead(ifp);
> if_addgroup(ifp, ifc->ifc_name);
> - lo0ifidx = ifp->if_index;
> + rtable_l2set(0, 0, ifp->if_index);
> } else
> if_attach(ifp);
> if_alloc_sadl(ifp);
> @@ -195,7 +196,7 @@ loop_clone_create(struct if_clone *ifc,
> int
> loop_clone_destroy(struct ifnet *ifp)
> {
> - if (ifp->if_index == lo0ifidx)
> + if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
> return (EPERM);
>
> if_detach(ifp);
> Index: net/if_var.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_var.h,v
> retrieving revision 1.75
> diff -u -p -r1.75 if_var.h
> --- net/if_var.h 4 Sep 2016 15:46:39 -0000 1.75
> +++ net/if_var.h 2 Nov 2016 12:11:19 -0000
> @@ -291,7 +291,6 @@ int niq_enlist(struct niqueue *, struct
> sysctl_mq((_n), (_l), (_op), (_olp), (_np), (_nl), &(_niq)->ni_q)
>
> extern struct ifnet_head ifnet;
> -extern unsigned int lo0ifidx;
> extern struct taskq *softnettq;
>
> void if_start(struct ifnet *);
> Index: net/route.c
> ===================================================================
> RCS file: /cvs/src/sys/net/route.c,v
> retrieving revision 1.333
> diff -u -p -r1.333 route.c
> --- net/route.c 6 Oct 2016 19:09:08 -0000 1.333
> +++ net/route.c 2 Nov 2016 16:31:00 -0000
> @@ -197,8 +197,6 @@ route_init(void)
> while (rt_hashjitter == 0)
> rt_hashjitter = arc4random();
>
> - if (rtable_add(0) != 0)
> - panic("route_init rtable_add");
> #ifdef BFD
> bfdinit();
> #endif
> Index: net/rtable.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.c,v
> retrieving revision 1.52
> diff -u -p -r1.52 rtable.c
> --- net/rtable.c 7 Sep 2016 09:36:49 -0000 1.52
> +++ net/rtable.c 2 Nov 2016 16:37:06 -0000
> @@ -1,7 +1,7 @@
> /* $OpenBSD: rtable.c,v 1.52 2016/09/07 09:36:49 mpi Exp $ */
>
> /*
> - * Copyright (c) 2014-2015 Martin Pieuchot
> + * Copyright (c) 2014-2016 Martin Pieuchot
> *
> * Permission to use, copy, modify, and distribute this software for any
> * purpose with or without fee is hereby granted, provided that the above
> @@ -41,7 +41,7 @@
> * afmap rtmap/dommp
> * ----------- --------- -----
> * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid
> (=index)
> - * ----------- --------- ----- to rdomain (=value).
> + * ----------- --------- ----- to rdomain/loopback (=value).
> * | AF_INET |.
> * ----------- `. .---------. .---------.
> * ... `----> | rtable0 | ... | rtableN | Array of pointers for
> @@ -59,10 +59,20 @@ struct rtmap {
> void **tbl;
> };
>
> -/* Array of rtableid -> rdomain mapping. */
> +/*
> + * Array of rtableid -> rdomain mapping.
> + *
> + * Only used for the first index as describbed above.
> + */
> struct dommp {
> unsigned int limit;
> - unsigned int *dom;
> + /*
> + * Array to get the routing domain and loopback interface related to
> + * a routing table. Format:
> + *
> + * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain
> + */
> + unsigned int *value;
> };
>
> unsigned int rtmap_limit = 0;
> @@ -146,6 +156,8 @@ rtable_init(void)
> unsigned int keylen = 0;
> int i;
>
> + KASSERT(sizeof(struct rtmap) == sizeof(struct dommp));
> +
> /* We use index 0 for the rtable/rdomain map. */
> af2idx_max = 1;
> memset(af2idx, 0, sizeof(af2idx));
> @@ -173,6 +185,9 @@ rtable_init(void)
> M_WAITOK|M_ZERO);
>
> rtmap_init();
> +
> + if (rtable_add(0) != 0)
> + panic("unable to create default routing table");
> }
>
> int
> @@ -221,7 +236,7 @@ rtable_add(unsigned int id)
>
> /* Use main rtable/rdomain by default. */
> dmm = srp_get_locked(&afmap[0]);
> - dmm->dom[id] = 0;
> + dmm->value[id] = 0;
>
> return (0);
> }
> @@ -272,24 +287,42 @@ rtable_l2(unsigned int rtableid)
>
> dmm = srp_enter(&sr, &afmap[0]);
> if (rtableid < dmm->limit)
> - rdomain = dmm->dom[rtableid];
> + rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK);
> srp_leave(&sr);
>
> return (rdomain);
> }
>
> +unsigned int
> +rtable_loindex(unsigned int rtableid)
> +{
> + struct dommp *dmm;
> + unsigned int loifidx = 0;
> + struct srp_ref sr;
> +
> + dmm = srp_enter(&sr, &afmap[0]);
> + if (rtableid < dmm->limit)
> + loifidx = dmm->value[rtableid] >> RT_TABLEID_BITS;
> + srp_leave(&sr);
> +
> + return (loifidx);
> +}
> +
> void
> -rtable_l2set(unsigned int rtableid, unsigned int rdomain)
> +rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int
> loifidx)
> {
> struct dommp *dmm;
> + unsigned int value;
>
> KERNEL_ASSERT_LOCKED();
>
> if (!rtable_exists(rtableid) || !rtable_exists(rdomain))
> return;
>
> + value = (rdomain & RT_TABLEID_MASK) + (loifidx << RT_TABLEID_BITS);
> +
> dmm = srp_get_locked(&afmap[0]);
> - dmm->dom[rtableid] = rdomain;
> + dmm->value[rtableid] = value;
> }
>
> #ifndef ART
> Index: net/rtable.h
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.h,v
> retrieving revision 1.16
> diff -u -p -r1.16 rtable.h
> --- net/rtable.h 7 Sep 2016 09:36:49 -0000 1.16
> +++ net/rtable.h 2 Nov 2016 12:01:11 -0000
> @@ -54,7 +54,8 @@ void rtable_init(void);
> int rtable_exists(unsigned int);
> int rtable_add(unsigned int);
> unsigned int rtable_l2(unsigned int);
> -void rtable_l2set(unsigned int, unsigned int);
> +unsigned int rtable_loindex(unsigned int);
> +void rtable_l2set(unsigned int, unsigned int, unsigned int);
>
> struct rtentry *rtable_lookup(unsigned int, struct sockaddr *,
> struct sockaddr *, struct sockaddr *, uint8_t);
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.327
> diff -u -p -r1.327 ip_output.c
> --- netinet/ip_output.c 4 Sep 2016 17:18:56 -0000 1.327
> +++ netinet/ip_output.c 2 Nov 2016 16:27:14 -0000
> @@ -211,7 +211,7 @@ reroute:
>
> ia = ifatoia(ro->ro_rt->rt_ifa);
> if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL))
> - ifp = if_get(lo0ifidx);
> + ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid));
> else
> ifp = if_get(ro->ro_rt->rt_ifidx);
> if (ifp == NULL) {
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.168
> diff -u -p -r1.168 ip6_input.c
> --- netinet6/ip6_input.c 24 Aug 2016 09:41:12 -0000 1.168
> +++ netinet6/ip6_input.c 2 Nov 2016 16:27:14 -0000
> @@ -211,7 +211,10 @@ ip6_input(struct mbuf *m)
> } else {
> if (m->m_next) {
> if (m->m_flags & M_LOOP) {
> - ip6stat.ip6s_m2m[lo0ifidx]++; /*XXX*/
> + int ifidx;
> +
> + ifidx = rtable_loindex(m->m_pkthdr.ph_rtableid);
> + ip6stat.ip6s_m2m[ifidx]++;
> } else if (ifp->if_index < nitems(ip6stat.ip6s_m2m))
> ip6stat.ip6s_m2m[ifp->if_index]++;
> else
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
> retrieving revision 1.216
> diff -u -p -r1.216 ip6_output.c
> --- netinet6/ip6_output.c 19 Sep 2016 18:09:09 -0000 1.216
> +++ netinet6/ip6_output.c 2 Nov 2016 12:10:02 -0000
> @@ -460,7 +460,7 @@ reroute:
> goto bad;
> }
> if (ISSET(rt->rt_flags, RTF_LOCAL))
> - ifp = if_get(lo0ifidx);
> + ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid));
> else
> ifp = if_get(rt->rt_ifidx);
> } else {
> Index: sys/socket.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/socket.h,v
> retrieving revision 1.92
> diff -u -p -r1.92 socket.h
> --- sys/socket.h 28 Sep 2016 18:50:20 -0000 1.92
> +++ sys/socket.h 2 Nov 2016 16:18:41 -0000
> @@ -143,7 +143,9 @@ struct splice {
> /*
> * Maximum number of alternate routing tables
> */
> -#define RT_TABLEID_MAX 255
> +#define RT_TABLEID_MAX 255
> +#define RT_TABLEID_BITS 8
> +#define RT_TABLEID_MASK 0xff
>
> #endif /* __BSD_VISIBLE */
>
>
--
The Moon is Waxing Crescent (11% of Full)