OK. I think I understand the patch now. It makes OVS native tunneling honor tunnel-specified source addresses, in the same way that Linux kernel tunneling honors them.
I see one potential problem: it makes ovs-router.c #include <linux/rtnetlink.h>. This is a problem because ovs-router.c is used on Windows and BSD as well as on Linux, and those systems do not have rtnetlink.h or RTN_LOCAL. Would you please change the patch to avoid this problem? Thank you for the contribution. It will make OVS native tunneling better. On Sat, Feb 03, 2018 at 08:47:41AM +0800, wenxu wrote: > Hi ben, > > > This patch can be a bugfix. > The tunnel_src of packet maybe not the IP address set by gre port > options:local_ip > dpdk-br has two address 10.1.1.7/24 and 10.1.1.254/32 > Interface gre options:local_ip="10.1.1.254" > But the tunnel_src of tunnel packet is always 10.1.1.7 > > > Also It can be a new feature. > In the same case the kernel space Open vSwitch can send packet with > tunnel_src 10.1.1.254. > > > Why we need this? > In the High-availability cluster > server1 with IP 10.1.1.7/24 and a virtual IP 10.1.1.254 > server2 with IP 10.1.1.8/24 and a virtual IP 10.1.1.254 > > > 10.1.1.7 and 10.1.1.8 running for the bgp and 10.1.1.254 provide the > dataplane forward > > > > > At 2018-02-03 06:30:51, "Ben Pfaff" <[email protected]> wrote: > >Thank you for submitting a patch to improve Open vSwitch. I do not yet > >understand the purpose of this patch. Is it a bug fix or a new feature? > > > >Thanks, > > > >Ben. > > > >On Fri, Feb 02, 2018 at 02:08:52PM +0800, [email protected] wrote: > >> From: wenxu <[email protected]> > >> > >> native tunnel build tunnel with tun_src only from the route src and > >> not care about the options:local_ip. > >> Sometimes an virtual IP using for tun_src > >> dpdk-br: > >> inet 10.1.1.7/24 brd 10.1.1.255 scope global dpdk-br > >> inet 10.1.1.254/32 scope global dpdk-br > >> > >> Interface: gre options: {key=flow, local_ip="10.1.1.254", remote_ip=flow} > >> > >> the native tunnel always using 10.1.1.7 as the tunnel_src but not > >> 10.1.1.254. > >> > >> This patch made valid tun_src specified by flow-action or gre port options > >> can be used for tunnel_src of packet. It stores the rtm_type for each route > >> and improve the priority RTN_LOCAL type(higher then userdef route). > >> Like the kernel space when lookup the route, if there are tun_src specified > >> by flow-action or port options. Check the tun_src wheather is a local > >> address, then lookup the route. > >> > >> Signed-off-by: wenxu <[email protected]> > >> Signed-off-by: frank.zeng <[email protected]> > >> --- > >> lib/ovs-router.c | 38 +++++++++++++++++++++++++++++++------- > >> lib/ovs-router.h | 2 +- > >> lib/route-table.c | 10 ++++++++-- > >> ofproto/ofproto-dpif-sflow.c | 2 +- > >> ofproto/ofproto-dpif-xlate.c | 4 ++++ > >> 5 files changed, 45 insertions(+), 11 deletions(-) > >> > >> diff --git a/lib/ovs-router.c b/lib/ovs-router.c > >> index 0f1103b..e1375a3 100644 > >> --- a/lib/ovs-router.c > >> +++ b/lib/ovs-router.c > >> @@ -29,6 +29,7 @@ > >> #include <stdlib.h> > >> #include <string.h> > >> #include <unistd.h> > >> +#include <linux/rtnetlink.h> > >> > >> #include "classifier.h" > >> #include "command-line.h" > >> @@ -61,6 +62,7 @@ struct ovs_router_entry { > >> struct in6_addr nw_addr; > >> struct in6_addr src_addr; > >> uint8_t plen; > >> + uint8_t rtm_type; > >> uint8_t priority; > >> uint32_t mark; > >> }; > >> @@ -97,13 +99,28 @@ ovs_router_lookup(uint32_t mark, const struct in6_addr > >> *ip6_dst, > >> const struct cls_rule *cr; > >> struct flow flow = {.ipv6_dst = *ip6_dst, .pkt_mark = mark}; > >> > >> + if (src && ipv6_addr_is_set(src)) { > >> + const struct cls_rule *cr_src; > >> + struct flow flow_src = {.ipv6_dst = *src, .pkt_mark = mark}; > >> + > >> + cr_src = classifier_lookup(&cls, OVS_VERSION_MAX, &flow_src, > >> NULL); > >> + if (cr_src) { > >> + struct ovs_router_entry *p_src = > >> ovs_router_entry_cast(cr_src); > >> + if (p_src->rtm_type != RTN_LOCAL) { > >> + return false; > >> + } > >> + } else { > >> + return false; > >> + } > >> + } > >> + > >> cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL); > >> if (cr) { > >> struct ovs_router_entry *p = ovs_router_entry_cast(cr); > >> > >> ovs_strlcpy(output_bridge, p->output_bridge, IFNAMSIZ); > >> *gw = p->gw; > >> - if (src) { > >> + if (src && !ipv6_addr_is_set(src)) { > >> *src = p->src_addr; > >> } > >> return true; > >> @@ -184,7 +201,7 @@ out: > >> } > >> > >> static int > >> -ovs_router_insert__(uint32_t mark, uint8_t priority, > >> +ovs_router_insert__(uint32_t mark, uint8_t priority, uint8_t rtm_type, > >> const struct in6_addr *ip6_dst, > >> uint8_t plen, const char output_bridge[], > >> const struct in6_addr *gw) > >> @@ -204,6 +221,7 @@ ovs_router_insert__(uint32_t mark, uint8_t priority, > >> p->mark = mark; > >> p->nw_addr = match.flow.ipv6_dst; > >> p->plen = plen; > >> + p->rtm_type = rtm_type; > >> p->priority = priority; > >> err = get_src_addr(ip6_dst, output_bridge, &p->src_addr); > >> if (err && ipv6_addr_is_set(gw)) { > >> @@ -236,9 +254,10 @@ ovs_router_insert__(uint32_t mark, uint8_t priority, > >> > >> void > >> ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, uint8_t > >> plen, > >> - const char output_bridge[], const struct in6_addr *gw) > >> + uint8_t rtm_type, const char output_bridge[], > >> + const struct in6_addr *gw) > >> { > >> - ovs_router_insert__(mark, plen, ip_dst, plen, output_bridge, gw); > >> + ovs_router_insert__(mark, plen, rtm_type, ip_dst, plen, > >> output_bridge, gw); > >> } > >> > >> static void > >> @@ -345,7 +364,7 @@ ovs_router_add(struct unixctl_conn *conn, int argc, > >> } > >> } > >> > >> - err = ovs_router_insert__(mark, plen + 32, &ip6, plen, argv[2], &gw6); > >> + err = ovs_router_insert__(mark, plen + 32, RTN_UNICAST, &ip6, plen, > >> argv[2], &gw6); > >> if (err) { > >> unixctl_command_reply_error(conn, "Error while inserting route."); > >> } else { > >> @@ -402,7 +421,12 @@ ovs_router_show(struct unixctl_conn *conn, int argc > >> OVS_UNUSED, > >> ipv6_format_mapped(&rt->nw_addr, &ds); > >> plen = rt->plen; > >> if (IN6_IS_ADDR_V4MAPPED(&rt->nw_addr)) { > >> - plen -= 96; > >> + uint8_t plen_off = 96; > >> + > >> + if (rt->rtm_type == RTN_LOCAL) { > >> + plen_off += 64; > >> + } > >> + plen -= plen_off; > >> } > >> ds_put_format(&ds, "/%"PRIu8, plen); > >> if (rt->mark) { > >> @@ -426,7 +450,7 @@ static void > >> ovs_router_lookup_cmd(struct unixctl_conn *conn, int argc, > >> const char *argv[], void *aux OVS_UNUSED) > >> { > >> - struct in6_addr gw, src; > >> + struct in6_addr gw, src = in6addr_any; > >> char iface[IFNAMSIZ]; > >> struct in6_addr ip6; > >> unsigned int plen; > >> diff --git a/lib/ovs-router.h b/lib/ovs-router.h > >> index b55b1a5..f41771b 100644 > >> --- a/lib/ovs-router.h > >> +++ b/lib/ovs-router.h > >> @@ -31,7 +31,7 @@ bool ovs_router_lookup(uint32_t mark, const struct > >> in6_addr *ip_dst, > >> struct in6_addr *src, struct in6_addr *gw); > >> void ovs_router_init(void); > >> void ovs_router_insert(uint32_t mark, const struct in6_addr *ip_dst, > >> - uint8_t plen, > >> + uint8_t plen, uint8_t rtm_type, > >> const char output_bridge[], const struct in6_addr > >> *gw); > >> void ovs_router_flush(void); > >> #ifdef __cplusplus > >> diff --git a/lib/route-table.c b/lib/route-table.c > >> index 97a0be5..e75d24e 100644 > >> --- a/lib/route-table.c > >> +++ b/lib/route-table.c > >> @@ -47,6 +47,7 @@ VLOG_DEFINE_THIS_MODULE(route_table); > >> struct route_data { > >> /* Copied from struct rtmsg. */ > >> unsigned char rtm_dst_len; > >> + unsigned char rtm_type; > >> > >> /* Extracted from Netlink attributes. */ > >> struct in6_addr rta_dst; /* 0 if missing. */ > >> @@ -228,6 +229,7 @@ route_table_parse(struct ofpbuf *buf, struct > >> route_table_msg *change) > >> if (parsed) { > >> const struct nlmsghdr *nlmsg; > >> int rta_oif; /* Output interface index. */ > >> + uint8_t dst_len_off = 96; > >> > >> nlmsg = buf->data; > >> > >> @@ -243,7 +245,11 @@ route_table_parse(struct ofpbuf *buf, struct > >> route_table_msg *change) > >> change->relevant = false; > >> } > >> change->nlmsg_type = nlmsg->nlmsg_type; > >> - change->rd.rtm_dst_len = rtm->rtm_dst_len + (ipv4 ? 96 : 0); > >> + if (rtm->rtm_type == RTN_LOCAL) { > >> + dst_len_off += 64; > >> + } > >> + change->rd.rtm_dst_len = rtm->rtm_dst_len + (ipv4 ? dst_len_off : > >> 0); > >> + change->rd.rtm_type = rtm->rtm_type; > >> if (attrs[RTA_OIF]) { > >> rta_oif = nl_attr_get_u32(attrs[RTA_OIF]); > >> > >> @@ -306,7 +312,7 @@ route_table_handle_msg(const struct route_table_msg > >> *change) > >> const struct route_data *rd = &change->rd; > >> > >> ovs_router_insert(rd->mark, &rd->rta_dst, rd->rtm_dst_len, > >> - rd->ifname, &rd->rta_gw); > >> + rd->rtm_type, rd->ifname, &rd->rta_gw); > >> } > >> } > >> > >> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c > >> index 60e1b4e..0a79623 100644 > >> --- a/ofproto/ofproto-dpif-sflow.c > >> +++ b/ofproto/ofproto-dpif-sflow.c > >> @@ -467,7 +467,7 @@ sflow_choose_agent_address(const char *agent_device, > >> > >> if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sa.ss) > >> && sa.ss.ss_family == AF_INET) { > >> - struct in6_addr addr6, src, gw; > >> + struct in6_addr addr6, gw, src = in6addr_any; > >> > >> in6_addr_set_mapped_ipv4(&addr6, sa.sin.sin_addr.s_addr); > >> /* sFlow only supports target in default routing table with > >> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > >> index cc450a8..4e62dab 100644 > >> --- a/ofproto/ofproto-dpif-xlate.c > >> +++ b/ofproto/ofproto-dpif-xlate.c > >> @@ -3319,6 +3319,10 @@ native_tunnel_output(struct xlate_ctx *ctx, const > >> struct xport *xport, > >> memcpy(&old_base_flow, &ctx->base_flow, sizeof old_base_flow); > >> memcpy(&old_flow, &ctx->xin->flow, sizeof old_flow); > >> > >> + if (flow->tunnel.ip_src) { > >> + in6_addr_set_mapped_ipv4(&s_ip6, flow->tunnel.ip_src); > >> + } > >> + > >> err = tnl_route_lookup_flow(ctx, flow, &d_ip6, &s_ip6, &out_dev); > >> if (err) { > >> xlate_report(ctx, OFT_WARN, "native tunnel routing failed"); > >> -- > >> 1.7.1 > >> > >> > >> _______________________________________________ > >> dev mailing list > >> [email protected] > >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
