Hello,
Attached patch is a draft so that Babel can be configured to set the 'src' field on routes it installed. It does what we need, but I would be less ugly if 'in' filter was added a 'src <ip>' action (instead of a command-line option). In our setup, we are using Babel to build a kind of VPN, with following properties: - we were given a /48 for our machines, e.g. 2001:db8:42::/48 - each node gets a /64 from this /48 - routing is done by Babel - when 2 nodes are not in the same LAN, they communicates through a tunnel (tap interface), so Babel actually runs on several interfaces: a LAN one and several taps - one of the nodes acts as a gateway to the internet But some machines already has good IPv6 connectivity with their FAI box so for performance reason, we'd like them to still use it when possible. OTOH, we have machines that would share their /64 with other machines (using radvd/dhcpd6). Let's see our most complex example: - sysctl use_tempaddr = 2 - eth0: linked to the FAI box - 2001:db8:1:3::5/64 scope global temporary dynamic - with a default route via fe80::1 - eth1: - 2001:db8:42:2::1/64 scope global - radvd/dhcpd6 to other machines - several tap interfaces for tunnels - Babel running on eth0 and taps: redistribute local deny redistribute ip 2001:db8:42:2::/64 eq 64 redistribute deny Here is how one would configure routes manually: # VPN 2001:db8:42:0::/64 src 2001:db8:42:2::1 via fe80::2 dev tapA 2001:db8:42:1::/64 src 2001:db8:42:2::1 via fe80::3 dev eth0 2001:db8:42:2::/64 dev eth1 2001:db8:42:3::/64 src 2001:db8:42:2::1 via fe80::2 dev tapA 2001:db8:42:4::/64 src 2001:db8:42:2::1 via fe80::4 dev tapB ... unreachable 2001:db8:42::/48 default from 2001:db8:42::/48 via fe80::2 dev tapA # FAI 2001:db8:1:3::/64 dev eth0 default via fe80::1 dev eth0 Testing: - ip route get 2001:db8:42:4::1 via fe80::4 dev tapB src 2001:db8:42:2::1 - ip route get ipv6.google.com via fe80::1 dev eth0 src 2001:db8:1:3::5 - ip route get ipv6.google.com from 2001:db8:42:4::1 via fe80::2 dev tapA * Why 'src' ? Because most of time, the VPN IP is not on the outgoing interface, the kernel may select the FAI one. Here is what we could get if there was no preferred source [1]: - ip route get 2001:db8:42:4::1 via fe80::4 dev tapB src 2001:db8:1:3::5 (IOW, a ping to 2001:db8:42:4::1 would fail on the return path) Our current workaround, because Babel can't set source address, is to use ip-addrlabel. This solves the case for interfaces without any IP (taps). But in the above example, this is not enough for eth0. * Why 'from' ? Routers may drop packets with unrecognized source address. So when forwarding a packet from the VPN to outside (i.e. the src IP is already to something in 2001:db8:42::/48, unlike the dst IP), the packet must continue its route up to VPN gateway, and only there it will be sent to the internet. Our current workaround is to use ip-rule (and tell Babel to use a separate table). This works perfectly but it would be simpler if Babel could set a 'from' address (e.g. with a additional 'from <prefix>' on input filter). Regards, Julien [1] In fact, there's rule saying the IP with longest matching prefix should be selected, but this is the last one of the RFC, and it's not mandatory. Previous rule may decide to use a temporary address (hence, use_tempaddr=2).
>From 70a183bbc70126e02208de1c81936a110785396e Mon Sep 17 00:00:00 2001 From: Julien Muchembled <[email protected]> Date: Sun, 16 Sep 2012 17:48:34 +0200 Subject: [PATCH] RTA_PREFSRC --- babeld.c | 10 +++++++++- kernel.h | 1 + kernel_netlink.c | 45 +++++++++++++++++++++++---------------------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/babeld.c b/babeld.c index ac3d4a2..07213ef 100644 --- a/babeld.c +++ b/babeld.c @@ -106,6 +106,7 @@ main(int argc, char **argv) void *vrc; unsigned int seed; struct interface *ifp; + unsigned char *addr; gettime(&now); @@ -123,7 +124,7 @@ main(int argc, char **argv) change_smoothing_half_life(4); while(1) { - opt = getopt(argc, argv, "m:p:h:H:i:k:A:suS:d:g:lwz:M:t:T:c:C:DL:I:F"); + opt = getopt(argc, argv, "m:p:h:H:i:k:A:suS:d:g:lwz:M:t:T:c:C:DL:I:FP:"); if(opt < 0) break; @@ -253,6 +254,13 @@ main(int argc, char **argv) case 'F': setup = 1; break; + case 'P': + addr = malloc(16); + rc = parse_address(optarg, addr, &i); + if(rc < 0) + goto usage; + preferred_source[i == AF_INET] = addr; + break; default: goto usage; } diff --git a/kernel.h b/kernel.h index c20afc0..7098767 100644 --- a/kernel.h +++ b/kernel.h @@ -43,6 +43,7 @@ struct kernel_route { #define CHANGE_ADDR (1 << 2) extern int export_table, import_table; +extern unsigned char *preferred_source[2]; int kernel_setup(int setup); int kernel_setup_socket(int setup); diff --git a/kernel_netlink.c b/kernel_netlink.c index aaf048f..736f50a 100644 --- a/kernel_netlink.c +++ b/kernel_netlink.c @@ -52,6 +52,7 @@ THE SOFTWARE. #include "interface.h" int export_table = -1, import_table = -1; +unsigned char *preferred_source[2] = {NULL, NULL}; static int old_forwarding = -1; static int old_ipv4_forwarding = -1; @@ -940,45 +941,45 @@ kernel_route(int operation, const unsigned char *dest, unsigned short plen, rta = RTM_RTA(rtm); - if(ipv4) { - rta = RTA_NEXT(rta, len); - rta->rta_len = RTA_LENGTH(sizeof(struct in_addr)); - rta->rta_type = RTA_DST; - memcpy(RTA_DATA(rta), dest + 12, sizeof(struct in_addr)); - } else { - rta = RTA_NEXT(rta, len); - rta->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); - rta->rta_type = RTA_DST; - memcpy(RTA_DATA(rta), dest, sizeof(struct in6_addr)); - } +#define ADD_IPARG(type, addr) \ + do if(ipv4) { \ + rta = RTA_NEXT(rta, len); \ + rta->rta_len = RTA_LENGTH(sizeof(struct in_addr)); \ + rta->rta_type = type; \ + memcpy(RTA_DATA(rta), addr + 12, sizeof(struct in_addr)); \ + } else { \ + rta = RTA_NEXT(rta, len); \ + rta->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); \ + rta->rta_type = type; \ + memcpy(RTA_DATA(rta), addr, sizeof(struct in6_addr)); \ + } while (0) + + ADD_IPARG(RTA_DST, dest); rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(int)); rta->rta_type = RTA_PRIORITY; if(metric < KERNEL_INFINITY) { + unsigned char *src; + *(int*)RTA_DATA(rta) = metric; rta = RTA_NEXT(rta, len); rta->rta_len = RTA_LENGTH(sizeof(int)); rta->rta_type = RTA_OIF; *(int*)RTA_DATA(rta) = ifindex; - if(ipv4) { - rta = RTA_NEXT(rta, len); - rta->rta_len = RTA_LENGTH(sizeof(struct in_addr)); - rta->rta_type = RTA_GATEWAY; - memcpy(RTA_DATA(rta), gate + 12, sizeof(struct in_addr)); - } else { - rta = RTA_NEXT(rta, len); - rta->rta_len = RTA_LENGTH(sizeof(struct in6_addr)); - rta->rta_type = RTA_GATEWAY; - memcpy(RTA_DATA(rta), gate, sizeof(struct in6_addr)); - } + ADD_IPARG(RTA_GATEWAY, gate); + src = preferred_source[ipv4]; + if(src) + ADD_IPARG(RTA_PREFSRC, src); } else { *(int*)RTA_DATA(rta) = -1; } buf.nh.nlmsg_len = (char*)rta + rta->rta_len - buf.raw; +#undef ADD_IPARG + return netlink_talk(&buf.nh); } -- 1.7.10.4.827.g28bab75.dirty
_______________________________________________ Babel-users mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/babel-users

