On 5/20/23 02:34, Ilya Maximets wrote:
> On 5/16/23 07:33, Nobuhiro MIKI wrote:
>> It supports flowlabel based load balancing by controlling the flowlabel
>> of outer IPv6 header, which is already implemented in Linux kernel as
>> seg6_flowlabel sysctl [1].
>>
>> [1]: https://docs.kernel.org/networking/seg6-sysctl.html
>>
>> Signed-off-by: Nobuhiro MIKI <[email protected]>
>> ---
>>  include/linux/openvswitch.h   |  3 +-
>>  lib/netdev-native-tnl.c       | 23 ++++++++++-
>>  lib/netdev-vport.c            |  8 ++++
>>  lib/netdev.h                  | 12 ++++++
>>  tests/tunnel-push-pop-ipv6.at | 72 +++++++++++++++++++++++++++++++++++
>>  vswitchd/vswitch.xml          | 26 +++++++++++++
>>  6 files changed, 141 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
>> index e305c331516b..278829cfa826 100644
>> --- a/include/linux/openvswitch.h
>> +++ b/include/linux/openvswitch.h
>> @@ -855,7 +855,8 @@ struct ovs_action_push_tnl {
>>      odp_port_t tnl_port;
>>      odp_port_t out_port;
>>      uint32_t header_len;
>> -    uint32_t tnl_type;     /* For logging. */
>> +    uint32_t tnl_type;       /* For logging. */
>> +    uint32_t srv6_flowlabel; /* Only for SRv6. */
>>      uint32_t header[TNL_PUSH_HEADER_SIZE / 4];
>>  };
>>  #endif
>> diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
>> index db1c4c6d9bfc..796169fe43ac 100644
>> --- a/lib/netdev-native-tnl.c
>> +++ b/lib/netdev-native-tnl.c
>> @@ -910,6 +910,7 @@ netdev_srv6_build_header(const struct netdev *netdev,
>>      }
>>  
>>      data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen;
>> +    data->srv6_flowlabel = tnl_cfg->srv6_flowlabel;
>>      data->tnl_type = OVS_VPORT_TYPE_SRV6;
>>  out:
>>      ovs_mutex_unlock(&dev->mutex);
>> @@ -922,10 +923,28 @@ netdev_srv6_push_header(const struct netdev *netdev 
>> OVS_UNUSED,
>>                          struct dp_packet *packet,
>>                          const struct ovs_action_push_tnl *data)
>>  {
>> +    struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(packet);
>> +    ovs_be32 ipv6_label = 0;
>>      int ip_tot_size;
>> +    uint32_t flow;
>>  
>> -    netdev_tnl_push_ip_header(packet, data->header, data->header_len,
>> -                              &ip_tot_size, 0);
>> +    switch (data->srv6_flowlabel) {
> 
> I understand why you added a new filed into the ovs_action_push_tnl structure,
> but I'm not sure we should do that.
> 
> If you'll look at the GRE seqno, for exmaple, you'll see that we're accessing
> it via the original tnl_cfg structure stored in netdev.  The problem is that
> it's not really thread safe, as I pointed out in review for the previous 
> version.
> I went ahead and tried to fix the thread-safety issues.  Patch set posted 
> here:
> 
>   https://patchwork.ozlabs.org/project/openvswitch/list/?series=355820&state=*
> 
> After these changes you should be able to directly use tunnel config like 
> this:
> 
>     switch (netdev_get_tunnel_config(netdev)->srv6_flowlabel) {
> 
> And it's going to be safe to do so.  And we'll not need to change the action
> structure.
> 
> What do you think?

One other option is to use the ipv6 label of the built header itself to 
communicate
the desired mode.   i.e. Write a enum value to the ipv6_label field during the 
header
build and check it here.

> 
> 
>> +    case SRV6_FLOWLABEL_COPY:
>> +        flow = ntohl(get_16aligned_be32(&ip6->ip6_flow));
>> +        ipv6_label = (flow >> 28) == 6 ? htonl(flow & IPV6_LABEL_MASK) : 0;
>> +        break;
>> +
>> +    case SRV6_FLOWLABEL_ZERO:
>> +        ipv6_label = 0;
>> +        break;
>> +
>> +    case SRV6_FLOWLABEL_COMPUTE:
>> +        ipv6_label = htonl(dp_packet_get_rss_hash(packet) & 
>> IPV6_LABEL_MASK);
>> +        break;
>> +    }
>> +
>> +    netdev_tnl_push_ip_header(packet, data->header,
>> +                              data->header_len, &ip_tot_size, ipv6_label);
>>  }
>>  
>>  struct dp_packet *
>> diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
>> index 663ee8606c3b..2141621cf23e 100644
>> --- a/lib/netdev-vport.c
>> +++ b/lib/netdev-vport.c
>> @@ -792,6 +792,14 @@ set_tunnel_config(struct netdev *dev_, const struct 
>> smap *args, char **errp)
>>                                name, node->value);
>>                  break;
>>              }
>> +        } else if (!strcmp(node->key, "srv6_flowlabel")) {
>> +            if (!strcmp(node->value, "zero")) {
>> +                tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_ZERO;
>> +            } else if (!strcmp(node->value, "compute")) {
>> +                tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COMPUTE;
>> +            } else {
>> +                tnl_cfg.srv6_flowlabel = SRV6_FLOWLABEL_COPY;
>> +            }
>>          } else if (!strcmp(node->key, "payload_type")) {
>>              if (!strcmp(node->value, "mpls")) {
>>                   tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS);
>> diff --git a/lib/netdev.h b/lib/netdev.h
>> index ff207f56c28c..58a438c8347c 100644
>> --- a/lib/netdev.h
>> +++ b/lib/netdev.h
>> @@ -97,6 +97,17 @@ enum netdev_pt_mode {
>>      NETDEV_PT_LEGACY_L3,
>>  };
>>  
>> +enum netdev_srv6_flowlabel {
>> +    /* Copy the flowlabel from inner packet. */
>> +    SRV6_FLOWLABEL_COPY,
>> +
>> +    /* Simply set flowlabel to 0. */
>> +    SRV6_FLOWLABEL_ZERO,
>> +
>> +    /* Set RSS hash of inner pakcet to flowlabel. */
> 
> I'd avoid using 'RSS' in the documentation.  It's an implementation
> detail that can be changed.  What's importan here is that it's a
> hash over L3/L4 fields of the inner packet.
> 
>> +    SRV6_FLOWLABEL_COMPUTE,
>> +};
>> +
>>  /* Configuration specific to tunnels. */
>>  struct netdev_tunnel_config {
>>      ovs_be64 in_key;
>> @@ -144,6 +155,7 @@ struct netdev_tunnel_config {
>>      uint8_t srv6_num_segs;
>>      #define SRV6_MAX_SEGS 6
>>      struct in6_addr srv6_segs[SRV6_MAX_SEGS];
>> +    enum netdev_srv6_flowlabel srv6_flowlabel;
>>  };
>>  
>>  void netdev_run(void);
>> diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
>> index e300fe3a0d26..35d56593d37d 100644
>> --- a/tests/tunnel-push-pop-ipv6.at
>> +++ b/tests/tunnel-push-pop-ipv6.at
>> @@ -1,5 +1,77 @@
>>  AT_BANNER([tunnel_push_pop_ipv6])
>>  
>> +AT_SETUP([tunnel_push_pop_ipv6 - srv6])
>> +
>> +OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy 
>> ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00 options:pcap=p0.pcap])
>> +AT_CHECK([ovs-vsctl add-br int-br1 -- set bridge int-br1 
>> datapath_type=dummy], [0])
>> +AT_CHECK([ovs-vsctl add-br int-br2 -- set bridge int-br2 
>> datapath_type=dummy], [0])
>> +AT_CHECK([ovs-vsctl add-br int-br3 -- set bridge int-br3 
>> datapath_type=dummy], [0])
>> +AT_CHECK([ovs-vsctl add-port int-br1 t1 -- set Interface t1 type=srv6 \
>> +                       options:remote_ip=2001:cafe::91 ofport_request=2 \
>> +                       options:srv6_flowlabel=copy \
>> +                       ], [0])
>> +AT_CHECK([ovs-vsctl add-port int-br2 t2 -- set Interface t2 type=srv6 \
>> +                       options:remote_ip=2001:cafe::92 ofport_request=3 \
>> +                       options:srv6_flowlabel=zero \
>> +                       ], [0])
>> +AT_CHECK([ovs-vsctl add-port int-br3 t3 -- set Interface t3 type=srv6 \
>> +                       options:remote_ip=2001:cafe::93 ofport_request=4 \
>> +                       options:srv6_flowlabel=compute \
>> +                       ], [0])
>> +
>> +dnl First setup dummy interface IP address, then add the route
>> +dnl so that tnl-port table can get valid IP address for the device.
>> +AT_CHECK([ovs-appctl netdev-dummy/ip6addr br0 2001:cafe::88/24], [0], [OK
>> +])
>> +AT_CHECK([ovs-appctl ovs/route/add 2001:cafe::0/24 br0], [0], [OK
>> +])
>> +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::91 aa:55:aa:55:00:01], 
>> [0], [OK
>> +])
>> +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::92 aa:55:aa:55:00:02], 
>> [0], [OK
>> +])
>> +AT_CHECK([ovs-appctl tnl/neigh/set br0 2001:cafe::93 aa:55:aa:55:00:03], 
>> [0], [OK
>> +])
>> +AT_CHECK([ovs-ofctl add-flow br0 action=1])
>> +AT_CHECK([ovs-ofctl add-flow int-br1 action=2])
>> +AT_CHECK([ovs-ofctl add-flow int-br2 action=3])
>> +AT_CHECK([ovs-ofctl add-flow int-br3 action=4])
>> +
>> +dnl Check "srv6_flowlabel=copy".
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 
>> 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 
>> 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 
>> 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=47,tclass=0x0,hlimit=64)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br1 
>> 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=47,tclass=0x0,hlimit=64)'])
>> +AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o 
>> 'ipv6_label=0x[[0-9a-f]]*'], [0], [dnl
>> +ipv6_label=0x00000
>> +ipv6_label=0x00000
>> +ipv6_label=0x00002
>> +ipv6_label=0x00003
>> +])
>> +
>> +dnl Check "srv6_flowlabel=zero".
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 
>> 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 
>> 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 
>> 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=47,tclass=0x0,hlimit=64)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br2 
>> 'in_port(3),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=47,tclass=0x0,hlimit=64)'])
>> +AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o 
>> 'ipv6_label=0x[[0-9a-f]]*'], [0], [dnl
>> +ipv6_label=0x00000
>> +ipv6_label=0x00000
>> +ipv6_label=0x00000
>> +ipv6_label=0x00000
>> +])
>> +
>> +dnl dnl Check "srv6_flowlabel=compute".
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 
>> 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 
>> 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 
>> 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::2,label=2,proto=47,tclass=0x0,hlimit=64)'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive int-br3 
>> 'in_port(4),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x86dd),ipv6(src=2001:beef::1,dst=2001:beef::3,label=3,proto=47,tclass=0x0,hlimit=64)'])
>> +AT_CHECK([ovs-ofctl parse-pcap p0.pcap | tail -n 4 | grep -o 
>> 'ipv6_label=0x[[0-9a-f]]*'| sort | uniq -c | wc -l], [0], [dnl
>> +4
> 
> So, you're sending 4 different packets and checking that they have
> different labels.  However, you're not testing that two packets
> from the same flow have the same hash.  Might be good to add a check
> like this.
> 
>> +])
>> +
>> +OVS_VSWITCHD_STOP
>> +AT_CLEANUP
>> +
>>  AT_SETUP([tunnel_push_pop_ipv6 - ip6gre])
>>  
>>  OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy 
>> ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00])
>> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
>> index edb5eafa04c3..8bcc361e8c62 100644
>> --- a/vswitchd/vswitch.xml
>> +++ b/vswitchd/vswitch.xml
>> @@ -3287,6 +3287,32 @@
>>             <ref column="options" key="remote_ip"/>.
>>          </p>
>>        </column>
>> +      <column name="options" key="srv6_flowlabel"
>> +              type='{"type": "string",
>> +                     "enum": ["set", ["zero", "copy", "compute"]]}'>
>> +        <p>
>> +          Optional.
>> +          This option controls how flowlabel in outer IPv6 header is
>> +          configured. It gives the benefit of IPv6 flow label based
>> +          load balancing, which is supported by some popular vendor
>> +          appliances. Like net.ipv6.seg6_flowlabel sysconfig, it is
>> +          one of the three values below:
>> +        </p>
>> +        <ul>
>> +          <li>
>> +            By default, or if this option is <code>copy</code>, copy the
>> +            flowlabel of inner IPv6 header to the flowlabel of outer IPv6
>> +            header. If inner header is not IPv6, it is set to 0.
>> +          </li>
>> +          <li>
>> +            If this option is <code>zero</code>, simply set flowlabel to 0.
>> +          </li>
>> +          <li>
>> +            If this option is <code>compute</code>, set RSS hash of inner
>> +            packet to flowlabel.
> 
> Same here.  We shouldn't mention RSS, we should say that it's a hash
> over L3/L4 fields of the inner packet.
> 
>> +          </li>
>> +        </ul>
>> +      </column>
>>      </group>
>>  
>>      <group title="Patch Options">
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to