On 5/16/23 07:33, Nobuhiro MIKI wrote: > It supports flowlabel based load balancing by controlling the flowlabel > of outer IPv6 header, which is already implemented in Linux kernel as > seg6_flowlabel sysctl [1]. > > [1]: https://docs.kernel.org/networking/seg6-sysctl.html > > Signed-off-by: Nobuhiro MIKI <[email protected]> > --- > include/linux/openvswitch.h | 3 +- > lib/netdev-native-tnl.c | 23 ++++++++++- > lib/netdev-vport.c | 8 ++++ > lib/netdev.h | 12 ++++++ > tests/tunnel-push-pop-ipv6.at | 72 +++++++++++++++++++++++++++++++++++ > vswitchd/vswitch.xml | 26 +++++++++++++ > 6 files changed, 141 insertions(+), 3 deletions(-) > > diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h > index e305c331516b..278829cfa826 100644 > --- a/include/linux/openvswitch.h > +++ b/include/linux/openvswitch.h > @@ -855,7 +855,8 @@ struct ovs_action_push_tnl { > odp_port_t tnl_port; > odp_port_t out_port; > uint32_t header_len; > - uint32_t tnl_type; /* For logging. */ > + uint32_t tnl_type; /* For logging. */ > + uint32_t srv6_flowlabel; /* Only for SRv6. */ > uint32_t header[TNL_PUSH_HEADER_SIZE / 4]; > }; > #endif > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c > index db1c4c6d9bfc..796169fe43ac 100644 > --- a/lib/netdev-native-tnl.c > +++ b/lib/netdev-native-tnl.c > @@ -910,6 +910,7 @@ netdev_srv6_build_header(const struct netdev *netdev, > } > > data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen; > + data->srv6_flowlabel = tnl_cfg->srv6_flowlabel; > data->tnl_type = OVS_VPORT_TYPE_SRV6; > out: > ovs_mutex_unlock(&dev->mutex); > @@ -922,10 +923,28 @@ netdev_srv6_push_header(const struct netdev *netdev > OVS_UNUSED, > struct dp_packet *packet, > const struct ovs_action_push_tnl *data) > { > + struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(packet); > + ovs_be32 ipv6_label = 0; > int ip_tot_size; > + uint32_t flow; > > - netdev_tnl_push_ip_header(packet, data->header, data->header_len, > - &ip_tot_size, 0); > + switch (data->srv6_flowlabel) {
I understand why you added a new filed into the ovs_action_push_tnl structure, but I'm not sure we should do that. If you'll look at the GRE seqno, for exmaple, you'll see that we're accessing it via the original tnl_cfg structure stored in netdev. The problem is that it's not really thread safe, as I pointed out in review for the previous version. I went ahead and tried to fix the thread-safety issues. Patch set posted here: https://patchwork.ozlabs.org/project/openvswitch/list/?series=355820&state=* After these changes you should be able to directly use tunnel config like this: switch (netdev_get_tunnel_config(netdev)->srv6_flowlabel) { And it's going to be safe to do so. And we'll not need to change the action structure. What do you think? > + case SRV6_FLOWLABEL_COPY: > + flow = ntohl(get_16aligned_be32(&ip6->ip6_flow)); > + ipv6_label = (flow >> 28) == 6 ? htonl(flow & IPV6_LABEL_MASK) : 0; > + break; > + > + case SRV6_FLOWLABEL_ZERO: > + ipv6_label = 0; > + break; > + > + case SRV6_FLOWLABEL_COMPUTE: > + ipv6_label = htonl(dp_packet_get_rss_hash(packet) & IPV6_LABEL_MASK); > + break; > + } > + > + netdev_tnl_push_ip_header(packet, data->header, > + data->header_len, &ip_tot_size, ipv6_label); > } > > struct dp_packet * > diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c > index 663ee8606c3b..2141621cf23e 100644 > --- a/lib/netdev-vport.c > +++ b/lib/netdev-vport.c > @@ -792,6 +792,14 @@ set_tunnel_config(struct netdev *dev_, const struct smap > *args, char **errp) > name, node->value); > break; > } > + } else if (!strcmp(node->key, "srv6_flowlabel")) { > + if (!strcmp(node->value, "zero")) { > + tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_ZERO; > + } else if (!strcmp(node->value, "compute")) { > + tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COMPUTE; > + } else { > + tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COPY; > + } > } else if (!strcmp(node->key, "payload_type")) { > if (!strcmp(node->value, "mpls")) { > tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS); > diff --git a/lib/netdev.h b/lib/netdev.h > index ff207f56c28c..58a438c8347c 100644 > --- a/lib/netdev.h > +++ b/lib/netdev.h > @@ -97,6 +97,17 @@ enum netdev_pt_mode { > NETDEV_PT_LEGACY_L3, > }; > > +enum netdev_srv6_flowlabel { > + /* Copy the flowlabel from inner packet. */ > + SRV6_FLOWLABEL_COPY, > + > + /* Simply set flowlabel to 0. */ > + SRV6_FLOWLABEL_ZERO, > + > + /* Set RSS hash of inner pakcet to flowlabel. */ I'd avoid using 'RSS' in the documentation. It's an implementation detail that can be changed. What's importan here is that it's a hash over L3/L4 fields of the inner packet. > + SRV6_FLOWLABEL_COMPUTE, > +}; > + > /* Configuration specific to tunnels. */ > struct netdev_tunnel_config { > ovs_be64 in_key; > @@ -144,6 +155,7 @@ struct netdev_tunnel_config { > uint8_t srv6_num_segs; > #define SRV6_MAX_SEGS 6 > struct in6_addr srv6_segs[SRV6_MAX_SEGS]; > + enum netdev_srv6_flowlabel srv6_flowlabel; > }; > > void netdev_run(void); > diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at > index e300fe3a0d26..35d56593d37d 100644 > --- a/tests/tunnel-push-pop-ipv6.at > +++ b/tests/tunnel-push-pop-ipv6.at > @@ -1,5 +1,77 @@ > AT_BANNER([tunnel_push_pop_ipv6]) > > +AT_SETUP([tunnel_push_pop_ipv6 - srv6]) > + > +OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy > ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00 options:pcap=p0.pcap]) > +AT_CHECK([ovs-vsctl add-br int-br1 -- set bridge int-br1 > datapath_type=dummy], [0]) > +AT_CHECK([ovs-vsctl add-br int-br2 -- set bridge int-br2 > datapath_type=dummy], [0]) > +AT_CHECK([ovs-vsctl add-br int-br3 -- set bridge int-br3 > datapath_type=dummy], [0]) > +AT_CHECK([ovs-vsctl add-port int-br1 t1 -- set Interface t1 type=srv6 \ > + options:remote_ip=2001:cafe::91 ofport_request=2 \ > + options:srv6_flowlabel=copy \ > + ], [0]) > +AT_CHECK([ovs-vsctl add-port int-br2 t2 -- set Interface t2 type=srv6 \ > + options:remote_ip=2001:cafe::92 ofport_request=3 \ > + options:srv6_flowlabel=zero \ > + ], [0]) > +AT_CHECK([ovs-vsctl add-port int-br3 t3 -- set Interface t3 type=srv6 \ > + options:remote_ip=2001:cafe::93 ofport_request=4 \ > + options:srv6_flowlabel=compute \ > + ], [0]) > + > +dnl First setup dummy interface IP address, then add the route > +dnl so that tnl-port table can get valid IP address for the device. > +AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 2001:cafe::88/24], [0], [OK > +]) > +AT_CHECK([ovs-appctl ovs/route/add 2001:cafe::0/24 br0], [0], [OK > +]) > +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::91 aa:55:aa:55:00:01], > [0], [OK > +]) > +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::92 aa:55:aa:55:00:02], > [0], [OK > +]) > +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::93 aa:55:aa:55:00:03], > [0], [OK > +]) > +AT_CHECK([ovs-ofctl add-flow br0 action=1]) > +AT_CHECK([ovs-ofctl add-flow int-br1 action=2]) > +AT_CHECK([ovs-ofctl add-flow int-br2 action=3]) > +AT_CHECK([ovs-ofctl add-flow int-br3 action=4]) > + > +dnl Check "srv6_flowlabel=copy". > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 > 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 > 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 > 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=47,tclass=0x0,hlimit=64)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 > 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=47,tclass=0x0,hlimit=64)']) > +AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o > 'ipv6_label=0x[[0-9a-f]]*'], [0], [dnl > +ipv6_label=0x00000 > +ipv6_label=0x00000 > +ipv6_label=0x00002 > +ipv6_label=0x00003 > +]) > + > +dnl Check "srv6_flowlabel=zero". > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 > 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 > 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 > 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=47,tclass=0x0,hlimit=64)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 > 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=47,tclass=0x0,hlimit=64)']) > +AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o > 'ipv6_label=0x[[0-9a-f]]*'], [0], [dnl > +ipv6_label=0x00000 > +ipv6_label=0x00000 > +ipv6_label=0x00000 > +ipv6_label=0x00000 > +]) > + > +dnl dnl Check "srv6_flowlabel=compute". > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 > 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 > 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 > 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=47,tclass=0x0,hlimit=64)']) > +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 > 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=47,tclass=0x0,hlimit=64)']) > +AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o > 'ipv6_label=0x[[0-9a-f]]*'| sort | uniq -c | wc -l], [0], [dnl > +4 So, you're sending 4 different packets and checking that they have different labels. However, you're not testing that two packets from the same flow have the same hash. Might be good to add a check like this. > +]) > + > +OVS_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([tunnel_push_pop_ipv6 - ip6gre]) > > OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy > ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > index edb5eafa04c3..8bcc361e8c62 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -3287,6 +3287,32 @@ > <ref column="options" key="remote_ip"/>. > </p> > </column> > + <column name="options" key="srv6_flowlabel" > + type='{"type": "string", > + "enum": ["set", ["zero", "copy", "compute"]]}'> > + <p> > + Optional. > + This option controls how flowlabel in outer IPv6 header is > + configured. It gives the benefit of IPv6 flow label based > + load balancing, which is supported by some popular vendor > + appliances. Like net.ipv6.seg6_flowlabel sysconfig, it is > + one of the three values below: > + </p> > + <ul> > + <li> > + By default, or if this option is <code>copy</code>, copy the > + flowlabel of inner IPv6 header to the flowlabel of outer IPv6 > + header. If inner header is not IPv6, it is set to 0. > + </li> > + <li> > + If this option is <code>zero</code>, simply set flowlabel to 0. > + </li> > + <li> > + If this option is <code>compute</code>, set RSS hash of inner > + packet to flowlabel. Same here. We shouldn't mention RSS, we should say that it's a hash over L3/L4 fields of the inner packet. > + </li> > + </ul> > + </column> > </group> > > <group title="Patch Options"> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
