It supports flowlabel based load balancing by controlling the flowlabel of outer IPv6 header, which is already implemented in Linux kernel as seg6_flowlabel sysctl [1].
[1]: https://docs.kernel.org/networking/seg6-sysctl.html Signed-off-by: Nobuhiro MIKI <[email protected]> --- lib/flow.c | 24 +++++++++++ lib/flow.h | 1 + lib/netdev-native-tnl.c | 22 +++++++++- lib/netdev-vport.c | 8 ++++ lib/netdev.h | 12 ++++++ tests/tunnel-push-pop-ipv6.at | 79 +++++++++++++++++++++++++++++++++++ vswitchd/vswitch.xml | 29 +++++++++++++ 7 files changed, 174 insertions(+), 1 deletion(-) diff --git a/lib/flow.c b/lib/flow.c index 9501a259e9d4..f27ec4795bc7 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -2734,6 +2734,30 @@ flow_hash_in_wildcards(const struct flow *flow, return hash_finish(hash, 8 * FLOW_U64S); } +uint32_t +flow_hash_srv6_flowlabel(const struct flow *flow, uint32_t basis) +{ + uint32_t hash = basis; + + if (flow->dl_type == htons(ETH_TYPE_IPV6)) { + const uint64_t *flow_u64 = (const uint64_t *) flow; + int ofs = offsetof(struct flow, ipv6_src) / 8; + int end = ofs + 2 * sizeof flow->ipv6_src / 8; + + for (;ofs < end; ofs++) { + hash = hash_add64(hash, flow_u64[ofs]); + } + + hash = hash_add(hash, flow->nw_proto); + hash = hash_add(hash, (OVS_FORCE uint32_t) flow->ipv6_label); + } else if (flow->dl_type == htons(ETH_TYPE_IP) + || flow->dl_type == htons(ETH_TYPE_ARP)) { + hash = flow_hash_5tuple(flow, basis); + } + + return hash_finish(hash, 42) & IPV6_LABEL_MASK; /* Arbitrary number. */ +} + /* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an * OpenFlow 1.0 "dl_vlan" value: * diff --git a/lib/flow.h b/lib/flow.h index a9d026e1ce3b..7b8ef5164465 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -258,6 +258,7 @@ bool flow_hash_fields_valid(enum nx_hash_fields); uint32_t flow_hash_in_wildcards(const struct flow *, const struct flow_wildcards *, uint32_t basis); +uint32_t flow_hash_srv6_flowlabel(const struct flow *, uint32_t basis); bool flow_equal_except(const struct flow *a, const struct flow *b, const struct flow_wildcards *); diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index 55e1bd567fa1..18bd9df57175 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -856,6 +856,7 @@ netdev_srv6_build_header(const struct netdev *netdev, struct netdev_tunnel_config *tnl_cfg; const struct in6_addr *segs; struct srv6_base_hdr *srh; + uint32_t ipv6_label = 0; struct in6_addr *s; ovs_be16 dl_type; int err = 0; @@ -882,7 +883,26 @@ netdev_srv6_build_header(const struct netdev *netdev, goto out; } - srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING, 0); + switch (tnl_cfg->srv6_flowlabel) { + case SRV6_FLOWLABEL_COPY: + ipv6_label = ntohl(params->flow->ipv6_label); + break; + + case SRV6_FLOWLABEL_ZERO: + ipv6_label = 0; + break; + + case SRV6_FLOWLABEL_COMPUTE: + ipv6_label = flow_hash_srv6_flowlabel(params->flow, 0); + break; + + default: + err = EINVAL; + goto out; + } + + srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING, + ipv6_label); srh->rt_hdr.segments_left = nr_segs - 1; srh->rt_hdr.type = IPV6_SRCRT_TYPE_4; srh->rt_hdr.hdrlen = 2 * nr_segs; diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 663ee8606c3b..2141621cf23e 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -792,6 +792,14 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) name, node->value); break; } + } else if (!strcmp(node->key, "srv6_flowlabel")) { + if (!strcmp(node->value, "zero")) { + tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_ZERO; + } else if (!strcmp(node->value, "compute")) { + tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COMPUTE; + } else { + tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COPY; + } } else if (!strcmp(node->key, "payload_type")) { if (!strcmp(node->value, "mpls")) { tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS); diff --git a/lib/netdev.h b/lib/netdev.h index ff207f56c28c..743a56ca1629 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -97,6 +97,17 @@ enum netdev_pt_mode { NETDEV_PT_LEGACY_L3, }; +enum netdev_srv6_flowlabel { + /* Copy the flowlabel from inner packet. */ + SRV6_FLOWLABEL_COPY, + + /* Simply set flowlabel to 0. */ + SRV6_FLOWLABEL_ZERO, + + /* Calculate a hash for some fields and set the result to flowlabel. */ + SRV6_FLOWLABEL_COMPUTE, +}; + /* Configuration specific to tunnels. */ struct netdev_tunnel_config { ovs_be64 in_key; @@ -144,6 +155,7 @@ struct netdev_tunnel_config { uint8_t srv6_num_segs; #define SRV6_MAX_SEGS 6 struct in6_addr srv6_segs[SRV6_MAX_SEGS]; + enum netdev_srv6_flowlabel srv6_flowlabel; }; void netdev_run(void); diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at index e300fe3a0d26..33edc8319eed 100644 --- a/tests/tunnel-push-pop-ipv6.at +++ b/tests/tunnel-push-pop-ipv6.at @@ -1,5 +1,84 @@ AT_BANNER([tunnel_push_pop_ipv6]) +AT_SETUP([tunnel_push_pop_ipv6 - srv6]) + +OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) +AT_CHECK([ovs-vsctl add-br int-br1 -- set bridge int-br1 datapath_type=dummy], [0]) +AT_CHECK([ovs-vsctl add-br int-br2 -- set bridge int-br2 datapath_type=dummy], [0]) +AT_CHECK([ovs-vsctl add-br int-br3 -- set bridge int-br3 datapath_type=dummy], [0]) +AT_CHECK([ovs-vsctl add-port int-br1 t1 -- set Interface t1 type=srv6 \ + options:remote_ip=2001:cafe::91 ofport_request=2 \ + options:srv6_flowlabel=copy \ + ], [0]) +AT_CHECK([ovs-vsctl add-port int-br2 t2 -- set Interface t2 type=srv6 \ + options:remote_ip=2001:cafe::92 ofport_request=3 \ + options:srv6_flowlabel=zero \ + ], [0]) +AT_CHECK([ovs-vsctl add-port int-br3 t3 -- set Interface t3 type=srv6 \ + options:remote_ip=2001:cafe::93 ofport_request=4 \ + options:srv6_flowlabel=compute \ + ], [0]) + +dnl First setup dummy interface IP address, then add the route +dnl so that tnl-port table can get valid IP address for the device. +AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 2001:cafe::88/24], [0], [OK +]) +AT_CHECK([ovs-appctl ovs/route/add 2001:cafe::0/24 br0], [0], [OK +]) +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::91 aa:55:aa:55:00:01], [0], [OK +]) +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::92 aa:55:aa:55:00:02], [0], [OK +]) +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::93 aa:55:aa:55:00:03], [0], [OK +]) +AT_CHECK([ovs-ofctl add-flow br0 action=1]) +AT_CHECK([ovs-ofctl add-flow int-br1 action=2]) +AT_CHECK([ovs-ofctl add-flow int-br2 action=3]) +AT_CHECK([ovs-ofctl add-flow int-br3 action=4]) + +dnl Check "srv6_flowlabel=copy". +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=12345,proto=47,tclass=0x0,hlimit=64)'], [0], [stdout]) +cat stdout +AT_CHECK([tail -1 stdout | grep -o 'label=[[0-9]]*'], [0], [dnl +label=12345 +]) + +dnl Check "srv6_flowlabel=zero". +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=12345,proto=47,tclass=0x0,hlimit=64)'], [0], [stdout]) +AT_CHECK([tail -1 stdout | grep -o 'label=[[0-9]]*'], [0], [dnl +label=0 +]) + +dnl Check "srv6_flowlabel=compute" for IPv4 in IPv6 tunnels. +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) +AT_CHECK([tail -1 stdout | grep -o 'label=[[0-9]]*'], [0], [dnl +label=944785 +]) + +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=6,tos=0,ttl=64,frag=no),tcp(src=800,dst=900)'], [0], [stdout]) +AT_CHECK([tail -1 stdout | grep -o 'label=[[0-9]]*'], [0], [dnl +label=772289 +]) + +dnl Check "srv6_flowlabel=compute" for IPv6 in IPv6 tunnels. +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=0,proto=47,tclass=0x0,hlimit=64)'], [0], [stdout]) +AT_CHECK([tail -1 stdout | grep -o 'label=[[0-9]]*'], [0], [dnl +label=468935 +]) + +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=12345,proto=47,tclass=0x0,hlimit=64)'], [0], [stdout]) +AT_CHECK([tail -1 stdout | grep -o 'label=[[0-9]]*'], [0], [dnl +label=1012207 +]) + +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=0,proto=47,tclass=0x0,hlimit=64)'], [0], [stdout]) +AT_CHECK([tail -1 stdout | grep -o 'label=[[0-9]]*'], [0], [dnl +label=629290 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel_push_pop_ipv6 - ip6gre]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index edb5eafa04c3..7a0682503233 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -3287,6 +3287,35 @@ <ref column="options" key="remote_ip"/>. </p> </column> + <column name="options" key="srv6_flowlabel" + type='{"type": "string", + "enum": ["set", ["zero", "copy", "compute"]]}'> + <p> + Optional. + This option controls how flowlabel in outer IPv6 header is + configured. This would give you the benefit of IPv6 flow label + based load balancing, which is supported by some popular vendor + appliances. You can choose from the following three types, as + in net.ipv6.seg6_flowlabel syscall. + </p> + <ul> + <li> + By default, or if this option is <code>copy</code>, copy the + flowlabel of inner IPv6 header to the flowlabel of outer IPv6 + header. If inner header is not IPv6, it is set to 0. + </li> + <li> + If this option is <code>zero</code>, simply set flowlabel to 0. + </li> + <li> + If this option is <code>compute</code>, calculate a hash for + some fields in inner header and set the result to flowlabel. + If inner packet is IPv6, src_ip, dst_ip, L4 proto, and + flowlabel are the targets of hash calculation. If it is IPv4, + src_ip, dst_ip, L4 proto, src_port, and dst_port are the targets. + </li> + </ul> + </column> </group> <group title="Patch Options"> -- 2.31.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
