[ovs-dev] [PATCH ovn] system-tests: Reduce flakiness of netcat UDP clients

2023-03-28 Thread Ales Musil
When we send a packet through UDP without other side
having open port, it usually replies with ICMP message.
netcat upon getting that error fails with:
"Ncat: Connection refused.". However, there is a race
because this message might arrive after the netcat client
is already done, so it will exit without any error.

To prevent that error ignore the return code, stdout and stderr
for those calls. It is fine to ignore because all those changed
instances are checking packet count after the calls so if the
netcat fails for other reasons it will be detected by the packet
count check.

Signed-off-by: Ales Musil 
---
 tests/system-ovn.at | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index bcd829c8b..939c2c3dc 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -4850,9 +4850,9 @@ NS_CHECK_EXEC([lsp], [tcpdump -l -nn -c 3 -i lsp 
${filter} > lsp.pcap 2>tcpdump_
 OVS_WAIT_UNTIL([grep "listening" tcpdump_err])
 
 # Generate IPv4 UDP hairpin traffic.
-NS_CHECK_EXEC([lsp], [echo a | nc -u 88.88.88.88 4040 &], [0])
-NS_CHECK_EXEC([lsp], [echo a | nc -u 88.88.88.89 4040 &], [0])
-NS_CHECK_EXEC([lsp], [echo a | nc -u 88.88.88.90 2021 &], [0])
+NS_CHECK_EXEC([lsp], [echo a | nc -u 88.88.88.88 4040], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([lsp], [echo a | nc -u 88.88.88.89 4040], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([lsp], [echo a | nc -u 88.88.88.90 2021], [ignore], [ignore], 
[ignore])
 
 # Check hairpin traffic.
 OVS_WAIT_UNTIL([
@@ -4949,9 +4949,9 @@ NS_CHECK_EXEC([lsp], [tcpdump -l -nn -c 3 -i lsp $filter 
> lsp.pcap 2>tcpdump_er
 OVS_WAIT_UNTIL([grep "listening" tcpdump_err])
 
 # Generate IPv6 UDP hairpin traffic.
-NS_CHECK_EXEC([lsp], [echo a | nc -u 8800::0088 4040 &], [0])
-NS_CHECK_EXEC([lsp], [echo a | nc -u 8800::0089 4040 &], [0])
-NS_CHECK_EXEC([lsp], [echo a | nc -u 8800::0090 2021 &], [0])
+NS_CHECK_EXEC([lsp], [echo a | nc -u 8800::0088 4040], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([lsp], [echo a | nc -u 8800::0089 4040], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([lsp], [echo a | nc -u 8800::0090 2021], [ignore], [ignore], 
[ignore])
 
 # Check hairpin traffic.
 OVS_WAIT_UNTIL([
@@ -9132,17 +9132,17 @@ NS_CHECK_EXEC([vm1], [nc 66.66.66.66 666 -z], [0], 
[ignore], [ignore])
 NS_CHECK_EXEC([vm2], [nc 66.66.66.66 666 -z], [0], [ignore], [ignore])
 NS_CHECK_EXEC([vm3], [nc 66.66.66.66 666 -z], [0], [ignore], [ignore])
 
-NS_CHECK_EXEC([vm1], [echo a | nc -u 66.66.66.66 777 &], [0])
-NS_CHECK_EXEC([vm2], [echo a | nc -u 66.66.66.66 777 &], [0])
-NS_CHECK_EXEC([vm3], [echo a | nc -u 66.66.66.66 777 &], [0])
+NS_CHECK_EXEC([vm1], [echo a | nc -u 66.66.66.66 777], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm2], [echo a | nc -u 66.66.66.66 777], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm3], [echo a | nc -u 66.66.66.66 777], [ignore], [ignore], 
[ignore])
 
 NS_CHECK_EXEC([vm1], [nc 66.66.66.66 888 -z], [0], [ignore], [ignore])
 NS_CHECK_EXEC([vm2], [nc 66.66.66.66 888 -z], [0], [ignore], [ignore])
 NS_CHECK_EXEC([vm3], [nc 66.66.66.66 888 -z], [0], [ignore], [ignore])
 
-NS_CHECK_EXEC([vm1], [echo a | nc -u 66.66.66.66 999 &], [0])
-NS_CHECK_EXEC([vm2], [echo a | nc -u 66.66.66.66 999 &], [0])
-NS_CHECK_EXEC([vm3], [echo a | nc -u 66.66.66.66 999 &], [0])
+NS_CHECK_EXEC([vm1], [echo a | nc -u 66.66.66.66 999], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm2], [echo a | nc -u 66.66.66.66 999], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm3], [echo a | nc -u 66.66.66.66 999], [ignore], [ignore], 
[ignore])
 
 OVS_WAIT_UNTIL([
 requests=`grep "UDP" -c vm1.pcap`
@@ -9271,17 +9271,17 @@ NS_CHECK_EXEC([vm1], [nc ::1 666 -z], [0], 
[ignore], [ignore])
 NS_CHECK_EXEC([vm2], [nc ::1 666 -z], [0], [ignore], [ignore])
 NS_CHECK_EXEC([vm3], [nc ::1 666 -z], [0], [ignore], [ignore])
 
-NS_CHECK_EXEC([vm1], [echo a | nc -u ::1 777 &], [0])
-NS_CHECK_EXEC([vm2], [echo a | nc -u ::1 777 &], [0])
-NS_CHECK_EXEC([vm3], [echo a | nc -u ::1 777 &], [0])
+NS_CHECK_EXEC([vm1], [echo a | nc -u ::1 777], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm2], [echo a | nc -u ::1 777], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm3], [echo a | nc -u ::1 777], [ignore], [ignore], 
[ignore])
 
 NS_CHECK_EXEC([vm1], [nc ::1 888 -z], [0], [ignore], [ignore])
 NS_CHECK_EXEC([vm2], [nc ::1 888 -z], [0], [ignore], [ignore])
 NS_CHECK_EXEC([vm3], [nc ::1 888 -z], [0], [ignore], [ignore])
 
-NS_CHECK_EXEC([vm1], [echo a | nc -u ::1 999 &], [0])
-NS_CHECK_EXEC([vm2], [echo a | nc -u ::1 999 &], [0])
-NS_CHECK_EXEC([vm3], [echo a | nc -u ::1 999 &], [0])
+NS_CHECK_EXEC([vm1], [echo a | nc -u ::1 999], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm2], [echo a | nc -u ::1 999], [ignore], [ignore], 
[ignore])
+NS_CHECK_EXEC([vm3], [echo a | nc -u ::1 999], [ignore], [ignore], 
[ignore])
 
 OVS_WAIT_UNTIL([
 

Re: [ovs-dev] [RFC PATCH ovn v2 8/9] ci: Use container to run the tests

2023-03-28 Thread Ales Musil
On Tue, Mar 28, 2023 at 5:31 PM Dumitru Ceara  wrote:

> On 3/15/23 07:29, Ales Musil wrote:
> > Move the ci.sh script into .ci folder
> > and remove the linux-prepare.sh as it
> > is no longer needed with all the requirements
> > installed in container.
>
> What about the OSX jobs?  Can't we wrap those in ci.sh easily?
>

I'm not sure to be honest, trying something on CI is tedious and I don't
have any
OSX available to try it out.


>
> I wonder if we can change container_exec() so that it runs on the host
> on OSX.  What do you think?


It might be possible, afterall podman should work on OSX. IMO it is not a
crucial part
for this series so we can always address that in a follow up WDYT?


>
> Regards,
> Dumitru
>
> >
> > Signed-off-by: Ales Musil 
> > ---
> >  {utilities/containers => .ci}/ci.sh |  0
> >  .ci/linux-prepare.sh| 21 ---
> >  .github/workflows/test.yml  | 54 ++---
> >  Makefile.am |  2 +-
> >  utilities/automake.mk   |  1 -
> >  5 files changed, 3 insertions(+), 75 deletions(-)
> >  rename {utilities/containers => .ci}/ci.sh (100%)
> >  delete mode 100755 .ci/linux-prepare.sh
> >
> > diff --git a/utilities/containers/ci.sh b/.ci/ci.sh
> > similarity index 100%
> > rename from utilities/containers/ci.sh
> > rename to .ci/ci.sh
> > diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh
> > deleted file mode 100755
> > index 6617d0c42..0
> > --- a/.ci/linux-prepare.sh
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -#!/bin/bash
> > -
> > -set -ev
> > -
> > -# Build and install sparse.
> > -#
> > -# Explicitly disable sparse support for llvm because some travis
> > -# environments claim to have LLVM (llvm-config exists and works) but
> > -# linking against it fails.
> > -# Disabling sqlite support because sindex build fails and we don't
> > -# really need this utility being installed.
> > -git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git
> > -cd sparse && make -j4 HAVE_LLVM= HAVE_SQLITE= install && cd ..
> > -
> > -# Installing wheel separately because it may be needed to build some
> > -# of the packages during dependency backtracking and pip >= 22.0 will
> > -# abort backtracking on build failures:
> > -# https://github.com/pypa/pip/issues/10655
> > -pip3 install --disable-pip-version-check --user wheel
> > -pip3 install --disable-pip-version-check --user \
> > --r utilities/containers/py-requirements.txt
> > diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
> > index 90dc8a6f1..d8b39da2c 100644
> > --- a/.github/workflows/test.yml
> > +++ b/.github/workflows/test.yml
> > @@ -14,11 +14,7 @@ concurrency:
> >  jobs:
> >build-linux:
> >  env:
> > -  dependencies: |
> > -automake libtool gcc bc libjemalloc2 libjemalloc-dev\
> > -libssl-dev llvm-dev libelf-dev libnuma-dev libpcap-dev  \
> > -selinux-policy-dev ncat python3-scapy isc-dhcp-server
> > -  m32_dependecies: gcc-multilib
> > +  IMAGE_NAME:  ghcr.io/ovn-org/ovn-tests:ubuntu
> >ARCH:${{ matrix.cfg.arch }}
> >CC:  ${{ matrix.cfg.compiler }}
> >LIBS:${{ matrix.cfg.libs }}
> > @@ -56,7 +52,6 @@ jobs:
> >  - { compiler: clang, testsuite: system-test, sanitizers:
> sanitizers, test_range: "-100" }
> >  - { compiler: clang, testsuite: system-test, sanitizers:
> sanitizers, test_range: "101-200" }
> >  - { compiler: clang, testsuite: system-test, sanitizers:
> sanitizers, test_range: "201-" }
> > -- { arch: x86, compiler: gcc, opts: --disable-ssl }
> >
> >  steps:
> >  - name: checkout
> > @@ -80,53 +75,8 @@ jobs:
> >  path: 'ovs'
> >  ref: 'master'
> >
> > -- name: update APT cache
> > -  run:  sudo apt update
> > -
> > -- name: remove netcat-openbsd
> > -  run:  sudo apt remove -y netcat-openbsd
> > -
> > -- name: install required dependencies
> > -  run:  sudo apt install -y ${{ env.dependencies }}
> > -
> > -- name: install libunbound libunwind
> > -  if:   matrix.cfg.arch != 'x86'
> > -  run:  sudo apt install -y libunbound-dev libunwind-dev
> > -
> > -- name: install 32-bit dependencies
> > -  if:   matrix.cfg.arch == 'x86'
> > -  run:  sudo apt install -y ${{ env.m32_dependecies }}
> > -
> > -- name: update PATH
> > -  run:  |
> > -echo "$HOME/bin">> $GITHUB_PATH
> > -echo "$HOME/.local/bin" >> $GITHUB_PATH
> > -
> > -- name: set up python
> > -  uses: actions/setup-python@v4
> > -  with:
> > -python-version: '3.x'
> > -
> > -- name: prepare
> > -  run:  ./.ci/linux-prepare.sh
> > -
> >  - name: build
> > -  run:  ./.ci/linux-build.sh
> > -
> > -- name: copy logs on failure
> > -  if: failure() || cancelled()
> > -  run: |
> > -# upload-artifact@v3 throws exceptions if it tries to upload
> socket
> > -# files and we 

Re: [ovs-dev] [PATCH ovn v2 1/9] ci: Add missing packages to the container

2023-03-28 Thread Ales Musil
On Tue, Mar 28, 2023 at 4:48 PM Dumitru Ceara  wrote:

> On 3/15/23 07:29, Ales Musil wrote:
> > Add missing jemalloc, kmod and scapy packages.
> > Install scapy through pip, because the
> > python3-scapy package has a lot of dependencies
> > that would increase the overall size of the image
> > by ~800 MB.
> >
> > Signed-off-by: Ales Musil 
> > ---
> >  utilities/containers/fedora/Dockerfile   | 2 ++
> >  utilities/containers/py-requirements.txt | 1 +
> >  2 files changed, 3 insertions(+)
> >
> > diff --git a/utilities/containers/fedora/Dockerfile
> b/utilities/containers/fedora/Dockerfile
> > index be8cd6ff2..d24bf1232 100755
> > --- a/utilities/containers/fedora/Dockerfile
> > +++ b/utilities/containers/fedora/Dockerfile
> > @@ -19,7 +19,9 @@ RUN dnf -y update \
> >  iproute \
> >  iproute-tc \
> >  iputils \
> > +jemalloc-devel \
> >  kernel-devel \
> > +kmod \
> >  libcap-ng-devel \
> >  libtool \
> >  net-tools \
>
> I think it might be better to follow the official installation procedure
> and instead of doing this do something like:
>
>
> https://github.com/ovn-org/ovn/blob/1bec9e3ddd8500793b52e11c3dc1f41ef1f48591/.ci/ovn-kubernetes/Dockerfile#L19
>
> RUN sed -e 's/@VERSION@/0.0.1/' rhel/openvswitch-fedora.spec.in >
> /tmp/ovs.spec
> RUN dnf builddep -y /tmp/ovs.spec
>
>
> https://github.com/ovn-org/ovn/blob/1bec9e3ddd8500793b52e11c3dc1f41ef1f48591/.ci/ovn-kubernetes/Dockerfile#L28
>
> RUN sed -e 's/@VERSION@/0.0.1/' rhel/ovn-fedora.spec.in > /tmp/ovn.spec
> RUN dnf builddep -y /tmp/ovn.spec
>
> There's still the risk that a new dependency is added and the new image
> hasn't been built/pushed yet.  How do you think it's best to address
> that?
>

We can trigger the image manually so if something new is added we will
trigger the job
once it is merged. I'm more concerned about those being build dependencies,
not test dependencies
which can be misleading. However if the vote is in favor of updating the
spec file with all the dependencies
for tests I'm fine with that.


>
> > diff --git a/utilities/containers/py-requirements.txt
> b/utilities/containers/py-requirements.txt
> > index d7bd21e0d..0d90765c9 100644
> > --- a/utilities/containers/py-requirements.txt
> > +++ b/utilities/containers/py-requirements.txt
> > @@ -1,5 +1,6 @@
> >  flake8
> >  hacking>=3.0
> > +scapy
> >  sphinx
> >  setuptools
> >  pyelftools
>
>
Thanks,
Ales

-- 

Ales Musil

Senior Software Engineer - OVN Core

Red Hat EMEA 

amu...@redhat.comIM: amusil

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v11 4/5] userspace: Add SRv6 tunnel support.

2023-03-28 Thread Nobuhiro MIKI
On 2023/03/28 21:31, Ilya Maximets wrote:
> On 3/28/23 08:42, Nobuhiro MIKI wrote:
>> SRv6 (Segment Routing IPv6) tunnel vport is responsible
>> for encapsulation and decapsulation the inner packets with
>> IPv6 header and an extended header called SRH
>> (Segment Routing Header). See spec in:
>>
>> https://datatracker.ietf.org/doc/html/rfc8754
>>
>> This patch implements SRv6 tunneling in userspace datapath.
>> It uses `remote_ip` and `local_ip` options as with existing
>> tunnel protocols. It also adds a dedicated `srv6_segs` option
>> to define a sequence of routers called segment list.
>>
>> Signed-off-by: Nobuhiro MIKI 
>> ---
>>  Documentation/faq/configuration.rst |  21 +
>>  Documentation/faq/releases.rst  |   1 +
>>  NEWS|   2 +
>>  include/linux/openvswitch.h |   1 +
>>  include/sparse/netinet/in.h |   1 +
>>  lib/dpif-netlink-rtnl.c |   5 ++
>>  lib/dpif-netlink.c  |   5 ++
>>  lib/netdev-native-tnl.c | 127 
>>  lib/netdev-native-tnl.h |  10 +++
>>  lib/netdev-vport.c  |  53 
>>  lib/netdev.h|   4 +
>>  lib/packets.h   |  11 +++
>>  lib/tnl-ports.c |   5 +-
>>  ofproto/ofproto-dpif-xlate.c|   3 +
>>  tests/system-kmod-macros.at |   8 ++
>>  tests/system-traffic.at | 119 ++
>>  tests/system-userspace-macros.at|   6 ++
>>  tests/tunnel.at |  56 
>>  18 files changed, 437 insertions(+), 1 deletion(-)
>>
> 
> 
> 
>> +int
>> +netdev_srv6_build_header(const struct netdev *netdev,
>> + struct ovs_action_push_tnl *data,
>> + const struct netdev_tnl_build_header_params 
>> *params)
>> +{
>> +struct netdev_vport *dev = netdev_vport_cast(netdev);
>> +struct netdev_tunnel_config *tnl_cfg;
>> +const struct in6_addr *segs;
>> +struct srv6_base_hdr *srh;
>> +struct in6_addr *s;
>> +ovs_be16 dl_type;
>> +int err = 0;
>> +int nr_segs;
>> +int i;
>> +
>> +ovs_mutex_lock(>mutex);
>> +tnl_cfg = >tnl_cfg;
>> +
>> +if (tnl_cfg->srv6_num_segs) {
>> +nr_segs = tnl_cfg->srv6_num_segs;
>> +segs = tnl_cfg->srv6_segs;
>> +} else {
>> +/*
>> + * If explicit segment list setting is omitted, tunnel destination
>> + * is considered to be the first segment list.
>> + */
>> +nr_segs = 1;
>> +segs = >flow->tunnel.ipv6_dst;
>> +}
>> +
>> +if (!ipv6_addr_equals([0], >flow->tunnel.ipv6_dst)) {
>> +err = EINVAL;
>> +goto out;
>> +}
>> +
>> +srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING);
>> +srh->rt_hdr.segments_left = nr_segs - 1;
>> +srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
>> +srh->rt_hdr.hdrlen = 2 * nr_segs;
>> +srh->last_entry = nr_segs - 1;
>> +srh->flags = 0;
>> +srh->tag = 0;
>> +
>> +dl_type = params->flow->dl_type;
>> +if (dl_type == htons(ETH_TYPE_IP)) {
>> +srh->rt_hdr.nexthdr = IPPROTO_IPIP;
>> +} else if (dl_type == htons(ETH_TYPE_IPV6)) {
>> +srh->rt_hdr.nexthdr = IPPROTO_IPV6;
>> +}
> 
> We should probably error out here if for some reason it's not an IP or IPv6.
> 

For cases other than IP or IPv6, fix to return EOPNOTSUPP, which means 
unsupported.

>> +
>> +s = ALIGNED_CAST(struct in6_addr *,
>> + (char *) srh + sizeof *srh);
>> +for (i = 0; i < nr_segs; i++) {
>> +/* Segment list is written to the header in reverse order. */
>> +memcpy(s, [nr_segs - i - 1], sizeof *s);
>> +s++;
>> +}
>> +
>> +data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen;
>> +data->tnl_type = OVS_VPORT_TYPE_SRV6;
>> +out:
>> +ovs_mutex_unlock(>mutex);
>> +
>> +return err;
>> +}
>> +
>> +void
>> +netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
>> +struct dp_packet *packet OVS_UNUSED,
>> +const struct ovs_action_push_tnl *data OVS_UNUSED)
> 
> 'packet' and 'data' are actually used by the function.
> 

Thanks.
I'll remove the annotation OVS_UNUSED from 'packet' and 'data'.

>> +{
>> +int ip_tot_size;
>> +
>> +netdev_tnl_push_ip_header(packet, data->header,
>> +  data->header_len, _tot_size);
>> +}
>> +
> 
> 
> 
>> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
>> index a9cf3cbee0be..15c814d6285b 100644
>> --- a/ofproto/ofproto-dpif-xlate.c
>> +++ b/ofproto/ofproto-dpif-xlate.c
>> @@ -3632,6 +3632,9 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, 
>> struct eth_addr dmac,
>>  case OVS_VPORT_TYPE_BAREUDP:
>>  nw_proto = IPPROTO_UDP;
>>  break;
>> +case OVS_VPORT_TYPE_SRV6:
>> +nw_proto = IPPROTO_IPIP;
> 
> Sorry, just noticed that 

Re: [ovs-dev] [PATCH] flow: fix sanity check for unexpected ip header length field

2023-03-28 Thread Faicker Mo
I think it's a problem which causes an unexpected megaflow. The flow maybe a 
drop megaflow to
drop the normal packet.
You can see it in the test.


The ovs kernel datapath does not check the length field when it extracts L3 
info from the skb.
The parser process is different between the kernel 
datapath(ovs_flow_key_extract) and
the userspace(miniflow_extract/ipv4_sanity_check)


No better method to handle the invalid packet,
1) if the kernel datapath drops the invalid packet, how about L2 switch?
2) if the kernel datapath parses only to L2 info like the userpace does,
the drop megaflow(no L3) will drop the valid L3 packet.


So this fix does as the kernel datapath does.






From: Flavio Leitner 
Date: 2023-03-27 23:26:27
To:  Simon Horman 
Cc:  Faicker Mo ,d...@openvswitch.org,Ilya Maximets 

Subject: Re: [ovs-dev] [PATCH] flow: fix sanity check for unexpected ip header 
length field>On Mon, Mar 27, 2023 at 03:34:52PM +0200, Simon Horman wrote:
>> On Wed, Mar 15, 2023 at 05:11:01PM +0800, Faicker Mo wrote:
>> > Derivation cases of CVE-2020-35498:
>> > 1. invalid ipv4 header total-length field
>> > 2. invalid ipv6 header payload-length field
>> > These may cause unwanted flow to send to datapath.
>> > 
>> > 
>> > Signed-off-by: Faicker Mo 
>> 
>> I think the immediate question here is how to correctly handle invalid
>> packets which claim to have a total length that is greater than the length
>> received (size) by OVS.
>
>OVS strives to forward all packets but at the same time it 
>can't rely on data from non-conforming packets.
>
>You can see with checksum that when OVS changes a packet 
>it recalculates the checksum in a way that it doesn't 
>change the original state (good/bad).
>
>> It doesn't seem to me that truncating the total length to size,
>> and thus pretending the packets are valid in this regard,
>> is the right approach.
>
>+1
>
>> Is there a bigger problem in that any packets that fail the sanity check
>> are problematic with regards to megaflow creation? If so, I think we should
>> aim for a more comprehensive solution.
>
>+1
>
>fbl
>
>
>> 
>> > ---
>> >  lib/flow.c  | 11 +--
>> >  tests/classifier.at | 42 ++
>> >  tests/ofp-print.at  |  2 +-
>> >  3 files changed, 48 insertions(+), 7 deletions(-)
>> > 
>> > 
>> > diff --git a/lib/flow.c b/lib/flow.c
>> > index c3a3aa3ce..d96d02213 100644
>> > --- a/lib/flow.c
>> > +++ b/lib/flow.c
>> > @@ -662,9 +662,8 @@ ipv4_sanity_check(const struct ip_header *nh, size_t 
>> > size,
>> >  return false;
>> >  }
>> >  
>> > -tot_len = ntohs(nh->ip_tot_len);
>> > -if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len ||
>> > -size - tot_len > UINT16_MAX)) {
>> > +tot_len = MIN(size, ntohs(nh->ip_tot_len));
>> > +if (OVS_UNLIKELY(ip_len > tot_len || size - tot_len > UINT16_MAX)) {
>> >  COVERAGE_INC(miniflow_extract_ipv4_pkt_len_error);
>> >  return false;
>> >  }
>> > @@ -700,7 +699,7 @@ ipv6_sanity_check(const struct ovs_16aligned_ip6_hdr 
>> > *nh, size_t size)
>> >  return false;
>> >  }
>> >  
>> > -plen = ntohs(nh->ip6_plen);
>> > +plen = MIN(size - sizeof *nh, ntohs(nh->ip6_plen));
>> >  if (OVS_UNLIKELY(plen + IPV6_HEADER_LEN > size)) {
>> >  COVERAGE_INC(miniflow_extract_ipv6_pkt_len_error);
>> >  return false;
>> > @@ -920,7 +919,7 @@ miniflow_extract(struct dp_packet *packet, struct 
>> > miniflow *dst)
>> >  }
>> >  data_pull(, , sizeof *nh);
>> >  
>> > -plen = ntohs(nh->ip6_plen);
>> > +plen = MIN(size, ntohs(nh->ip6_plen));
>> >  dp_packet_set_l2_pad_size(packet, size - plen);
>> >  size = plen;   /* Never pull padding. */
>> >  
>> > @@ -1197,7 +1196,7 @@ parse_tcp_flags(struct dp_packet *packet,
>> >  }
>> >  data_pull(, , sizeof *nh);
>> >  
>> > -plen = ntohs(nh->ip6_plen); /* Never pull padding. */
>> > +plen = MIN(size, ntohs(nh->ip6_plen)); /* Never pull padding. */
>> >  dp_packet_set_l2_pad_size(packet, size - plen);
>> >  size = plen;
>> >  const struct ovs_16aligned_ip6_frag *frag_hdr;
>> > diff --git a/tests/classifier.at b/tests/classifier.at
>> > index de2705653..1a1615bb5 100644
>> > --- a/tests/classifier.at
>> > +++ b/tests/classifier.at
>> > @@ -418,6 +418,7 @@ ovs-ofctl: "conjunction" actions may be used along 
>> > with "note" but not any other
>> >  OVS_VSWITCHD_STOP
>> >  AT_CLEANUP
>> >  
>> > +AT_BANNER([flow classifier abnormal packet])
>> >  # Flow classifier a packet with excess of padding.
>> >  AT_SETUP([flow classifier - packet with extra padding])
>> >  OVS_VSWITCHD_START
>> > @@ -453,3 +454,44 @@ Datapath actions: 2
>> >  ])
>> >  OVS_VSWITCHD_STOP
>> >  AT_CLEANUP
>> > +
>> > +dnl Flow classifier a packet with invalid total-length field of ipv4 
>> > header
>> > +AT_SETUP([flow classifier - packet with invalid total-length field of 
>> > ipv4 

Re: [ovs-dev] [PATCH v24 3/8] netdev-offload-tc: Introduce group ID management API

2023-03-28 Thread Chris Mi via dev

On 3/23/2023 8:07 PM, Eelco Chaudron wrote:


On 23 Mar 2023, at 12:24, Chris Mi wrote:


On 3/23/2023 5:47 PM, Eelco Chaudron wrote:

On 23 Mar 2023, at 10:28, Chris Mi wrote:


On 3/22/2023 6:45 PM, Eelco Chaudron wrote:

On 22 Mar 2023, at 7:15, Chris Mi wrote:


On 3/20/2023 6:04 PM, Eelco Chaudron wrote:

On 20 Mar 2023, at 6:44, Chris Mi wrote:


On 3/16/2023 5:09 PM, Eelco Chaudron wrote:

On 1 Mar 2023, at 8:22, Chris Mi wrote:


When offloading sample action to TC, userspace creates a unique ID
to map sFlow action and tunnel info and passes this ID to kernel
instead of the sFlow info. Psample will send this ID and sampled
packet to userspace. Using the ID, userspace can recover the sFlow
info and send sampled packet to the right sFlow monitoring host.

See some comments inline below.

//Eelco


Signed-off-by: Chris Mi
Reviewed-by: Roi Dayan
---
  lib/netdev-offload-tc.c | 233 +++-
  lib/tc.h|   1 +
  2 files changed, 232 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 4fb9d9f21..4214337d8 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -41,6 +41,7 @@
  #include "unaligned.h"
  #include "util.h"
  #include "dpif-provider.h"
+#include "cmap.h"

  VLOG_DEFINE_THIS_MODULE(netdev_offload_tc);

@@ -103,6 +104,226 @@ static void parse_tc_flower_to_stats(struct tc_flower 
*flower,
  static int get_ufid_adjust_stats(const ovs_u128 *ufid,
   struct dpif_flow_stats *stats);

+/* When offloading sample action to TC, userspace creates a unique ID
+ * to map sFlow action and tunnel info and passes this ID to kernel
+ * instead of the sFlow info. Psample will send this ID and sampled
+ * packet to userspace. Using the ID, userspace can recover the sFlow
+ * info and send sampled packet to the right sFlow monitoring host.
+ */
+struct offload_sflow {

Should we maybe already give this a more general name, like offload_sample?
This as you need the same structure later when you add support of offload sent 
to a controller?

OK.

We might need an offload_type field.

OK.

+struct nlattr *action;   /* SFlow action. Used in flow_get. */
+struct nlattr *userdata; /* Struct user_action_cookie. */
+struct nlattr *actions;  /* All actions to get output tunnel. */

You should not need actions at all, see comments later. These are the next 
actions, which are not of interest to sflow offload.

As the comment describes, we need it to get the output tunnel. Otherwise, we'll 
lose the following info using sflowtool after offloading:

extendedType out_VNI
out_VNI 4
flowBlock_tag 0:1023
flowSampleType tunnel_ipv4_out_IPV4
tunnel_ipv4_out_sampledPacketSize 0
tunnel_ipv4_out_IPSize 0
tunnel_ipv4_out_srcIP 0.0.0.0
tunnel_ipv4_out_dstIP 192.168.1.66
tunnel_ipv4_out_IPProtocol 17
tunnel_ipv4_out_IPTOS 0
tunnel_ipv4_out_UDPSrcPort 0
tunnel_ipv4_out_UDPDstPort 46354

I don't think we can get the output tunnel info without mapping 'actions'.

I’m confused as from the code it looks like actions are the outer actions, and 
your actions should come from the inner actions, i.e., they should be part of 
action.

In my understanding, 'actions' is all actions, for example:
"actions:userspace(pid=4294967295,sFlow(vid=0,pcp=0,output=935),actions),set(tunnel(tun_id=0x4,dst=192.168.1.66,ttl=64,tp_dst=4789,flags(df|key))),vxlan_sys_4789"
Not sure what do you mean by outer actions and inner actions.

actions: [userspace(pid=4294967295,sFlow(vid=0,pcp=0,output=935),actions)], 
[set(tunnel(tun_id=0x4,dst=192.168.1.66,ttl=64,tp_dst=4789,flags(df|key))),vxlan_sys_4789]
+  
--
|  
\ These are the remaining (outer) actions and the sflow implementation should 
not care about this at all.
\ These are the sflow actions and this is all what sflow should 
care about!


So sflow should only use the sFlow actions, and the remaining (outer) actions 
should not be touched by sFlow these should be implemented/using the existing 
TC offloads.
If you supply these actions to the upcall they are also executed (again) in 
userspace, which they should not.


Maybe I’m not understanding this correctly, and as this is a more complex case, 
we should have a test case fore it.

If you can share the test case before the next revision of the patchset, I can 
revisit it to safe another version…

Our QA reported this bug using their test. It's written in python. I'm afraid 
it doesn't help even if I can share it.
They said that sflowtool shows different results between hw-offload="true" and 
non-offload.

I read the code and found this:
dpif_sflow_received()
{
...
       /* Output tunnel. */
       if (sflow_actions
       

Re: [ovs-dev] [RFC ovn 0/3] rework OVN QoS implementation

2023-03-28 Thread Ihar Hrachyshka
Hi Lorenzo et al.

I took a look at the series and have a number of concerns. Note that I
looked at the series in context of OpenStack and the way Neutron uses
qos_ options. Some points below are not directly relevant to the
series but I think they add important context.

---

First, I will agree with the direction of the patch - it is beneficial
to switch from TC to using OVS queues to configure LSP level qos_*
settings.

But, see below.

1. The 2/3 patch explicitly claims that the qos_min_rate option is for
localnet ports only: "QoS is only supported on localnet port for the
moment". Actually, it's vice versa: the option is documented for VIF
(regular) ports
only:https://github.com/ovn-org/ovn/blob/1bec9e3ddd8500793b52e11c3dc1f41ef1f48591/ovn-nb.xml#L1096
To compare, the localnet section of the document doesn't list this
option: 
https://github.com/ovn-org/ovn/blob/1bec9e3ddd8500793b52e11c3dc1f41ef1f48591/ovn-nb.xml#L1034

This assumption about the application of the options to localnet
ports, and not to other port types, seems to creep into the 1/3 patch
of the series too.

I tried to understand where the misunderstanding comes from. Perhaps
it's because the "egress qos" system test was added to cover a
supposed I-P regression where qos is not applied to localnet ports.
Then later, when adding qos_min_rate option support, I patched the
existing test case, without realizing that it sets qos rules to
localnet port, not regular VIF port (as per NB documentation). We
should probably fix the test case (or add a new one for regular ports
if we decide that localnet qos is a valid scenario).

Or perhaps it's because we call consider_localnet_lport with qos_map
passed, so it also configures qos queues for these ports.

Regardless, it's clear that regular (VIF) ports should support qos_
options, so any changes to disable qos for them is invalid. Actually,
OpenStack Neutron already uses qos_min_rate option to configure its
minimal bandwidth guarantees, and these guarantees are set for regular
ports. I don't know if localnet ports should also support qos rules
(if so, documentation should be updated to include the options in
localnet section).

2. AFAIU there's some bug in qos code where policies that are not set
for a port are being applied, when there are multiple localnet ports.
(At least that's what I figured from a discussion with Rodolfo;
Rodolfo, could you please describe the exact scenario that you
experienced?) But it's not related to issues with qos_ options set to
localnet ports (because Neutron doesn't set them for localnets.)

3. Side note: while reviewing the series, I noticed that
port_has_qos_params function doesn't check if qos_min_rate option is
set. This is not a problem of your series, but should be fixed.

4. Side note about OpenStack: AFAIU for a physical interface to be
included in qdisc setup, it should be marked with
ovn-egress-iface=true. But I couldn't find code in Neutron that would
configure it. (Nor anything relevant on Github.) Rodolfo, could you
please clarify if any component sets the option up?

---

For reference, here is how qos works right now:
- qos_ options are set for regular (VIF) port.
- northd allocates qdisc_queue_id for each qos-enabled (regular) port.
- ovn-controller sets metric for the ID, plus creates qdisc queues on
physical interfaces that are related to the ports (through
corresponding localnet ports on the same switch).

(Let me know if the above doesn't match your vision of how things work now.)

Thanks,
Ihar

On Mon, Mar 20, 2023 at 2:33 PM Mark Michelson  wrote:
>
> Hi Lorenzo,
>
> Since this is an RFC series, I did not do an in-depth review of the
> patches. I just took a quick look.
>
> My only concern is that patch 1 breaks existing configurations for QoS,
> since the ovs-port parameter is now required. Is there any way to
> preserve existing behavior [1] for deployments that do not have this
> option set?
>
> Other than that, at a high level it seems like a good series.
>
> Thanks,
> Mark Michelson
>
>
> [1] By "existing behavior" I don't mean to configure tc directly. I just
> mean that QoS will end up set up with the same behavior as it did before
> this patch series.
>
> On 3/17/23 16:00, Lorenzo Bianconi wrote:
> > Rework OVN QoS implementation in order to configure it through OVS QoS
> > table instead of running tc command directly bypassing OVS.
> >
> > Lorenzo Bianconi (3):
> >controller: fix interface qos aliasing
> >controller: configure qos through ovs qos table and do not run tc
> >  directly
> >controller: get rid of egress_ifaces
> >
> >   controller/binding.c| 374 
> >   controller/binding.h|   1 -
> >   controller/ovn-controller.c |  15 +-
> >   northd/northd.c |   3 +-
> >   northd/ovn-northd.8.xml |   6 -
> >   ovn-nb.xml  |   5 +
> >   ovn-sb.xml  |   5 +
> >   tests/ovn-performance.at|   5 -
> >   tests/system-ovn.at   

Re: [ovs-dev] [PATCH ovn v5] northd: Make the use of common zone in NAT configurable

2023-03-28 Thread Numan Siddique
On Tue, Mar 28, 2023 at 1:11 AM Ales Musil  wrote:
>
> On Mon, Mar 27, 2023 at 10:01 PM Numan Siddique  wrote:
>
> > "
> >
> > On Mon, Mar 27, 2023 at 2:34 AM Ales Musil  wrote:
> > >
> > > There are essentially three problems with the current
> > > combination of DGP + SNAT + LB:
> > >
> > > 1) The first packet is being SNATed in common zone due
> > > to a problem with pinctrl not preserving ct_mark/ct_label.
> > > The commit would create a SNAT entry within the same with DNAT
> > > entry.
> > >
> > > 2) The unSNAT for reply always happened in common zone because of
> > > the loopback check which would be triggered only when we loop
> > > the packet through the LR. Now there are two possibilities how
> > > the reply packet would be handled:
> > >
> > > a) If the entry for SNAT in common zone did not time out yet, the
> > > packet would be passed through unSNAT in common zone which would
> > > be fine and continue on. However, the unDNAT wouldn't work due to
> > > the limitation of CT not capable of doing unSNAT/unDNAT on the same
> > > packet twice in the same zone. So the reply would arrive to
> > > the original interface, but with wrong source address.
> > >
> > > b) If the entry for SNAT timed out it would loop and do unSNAT correctly
> > > in separate zone and then also unDNAT. This is not possible anymore with
> > > a recent change 8c341b9d (northd: Drop packets destined to router owned
> > NAT IP for DGP).
> > > The reply would be dropped before looping after that change co the
> > traffic
> > > would never arrive to the original interface.
> > >
> > > 3) The unDNAT was happening only if the DGP was outport meaning
> > > the reply traffic was routed out, but in the opposite case
> > > the unDNAT was happening only because of the looping which made
> > > outport=inport. That's why it worked before introduction of explicit
> > drop.
> > >
> > > In order to fix all those issues do two changes:
> > >
> > > 1) Include inport in the unDNAT match, so that we have both
> > > routing directions covered e.g. (inport == "dgp_port" || outport ==
> > "dpg_port").
> > >
> > > 2) Always use the separate zone for SNAT and DNAT. As the common
> > > zone was needed for HWOL make the common zone optional with
> > > configuration option called "use_common_zone". This option is
> > > by default "false" and can be specified per LR. Use of separate
> > > zones also eliminates the need for the flag propagation
> > > in "lr_out_chk_dnat_local" stage, removing the match on ct_mark/ct_label.
> > >
> > > Reported-at: https://bugzilla.redhat.com/2161281
> > > Acked-by: Mark Michelson 
> > > Signed-off-by: Ales Musil 
> > > ---
> > > v2: Fix flaky system test.
> > > v3: Rebase on top of current main.
> > > v4: Rebase on top of current main.
> > > Move the system test to system-ovn-kmod
> > > to unblock the failures with userspace.
> > > v5: Rebase on top of current main.
> >
> > Hi Ales,
> >
>
> > With this patch applied, would we still see the issue if
> > "use_common_zone" is configured on a distributed router ?
> >
> > If not I'd suggest making the "use_common_zone" enabled by default for
> > the following reasons
> >  - The original issue was reported by openstack neutron and a load
> > balancer would be configured only when octavia ovn provider driver is
> > used.  I think this is not a common scenario
> >  - Using a common zone reduces recirculations and has some
> > performance benefits.
> > -  use common zone feature was added in v21.12 and with this patch
> > we will be changing the behavior and HWOL use cases may break
> > unexpectedly
> > -  openstack neutron can add the code to disable "use_common_zone"
> > when a logical router is configured with both NAT and load balancer.
> >
> >
> > Thanks
> > Numan
> >
>
>
> Hi Numan,
>
> unfortunately the described scenario is still broken even if we flip the
> "use_common_zone",
> it's there for backwards compatibility. We can keep it broken by default
> and document that,
> however I don't feel like this is the way to go.

Please see the reply from @Daniel Alvarez Sanchez here -
https://mail.openvswitch.org/pipermail/ovs-dev/2023-March/403184.html

I agree with his suggestion.   Lets disable use_common_zone by default
and make it configurable globally so that CMS can enable use of the
common zones
if required without any code changes to neutron.

Thanks
Numan

>
> Thanks,
> Ales
>
>
> >
> > > ---
> > >  northd/northd.c  | 510 +--
> > >  northd/ovn-northd.8.xml  |  90 +--
> > >  ovn-nb.xml   |  10 +
> > >  tests/ovn-northd.at  | 215 -
> > >  tests/ovn.at |   3 +
> > >  tests/system-ovn-kmod.at | 166 +
> > >  tests/system-ovn.at  | 117 -
> > >  7 files changed, 611 insertions(+), 500 deletions(-)
> > >
> > > diff --git a/northd/northd.c b/northd/northd.c
> > > index 339286be3..d3e6a7b2d 100644
> > > --- a/northd/northd.c
> > > +++ 

Re: [ovs-dev] [PATCH] seq: Make read of the current value atomic

2023-03-28 Thread Mike Pattrick
On Mon, Mar 27, 2023 at 7:25 AM Eelco Chaudron  wrote:
>
> Make the read of the current seq->value atomic, i.e., not needing to
> acquire the global mutex when reading it. On 64-bit systems, this
> incurs no overhead, and it will avoid the mutex and potentially
> a system call.
>
> For incrementing the value followed by waking up the threads, we are
> still taking the mutex, so the current behavior is not changing. The
> seq_read() behavior is already defined as, "Returns seq's current
> sequence number (which could change immediately)". So the change
> should not impact the current behavior.
>
> Signed-off-by: Eelco Chaudron 
> ---
>  lib/ovs-rcu.c |2 +-
>  lib/seq.c |   37 -
>  lib/seq.h |1 -
>  3 files changed, 17 insertions(+), 23 deletions(-)
>
> diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
> index 946aa04d1..9e07d9bab 100644
> --- a/lib/ovs-rcu.c
> +++ b/lib/ovs-rcu.c
> @@ -170,7 +170,7 @@ ovsrcu_try_quiesce(void)
>  ovs_assert(!single_threaded());
>  perthread = ovsrcu_perthread_get();
>  if (!seq_try_lock()) {
> -perthread->seqno = seq_read_protected(global_seqno);
> +perthread->seqno = seq_read(global_seqno);
>  if (perthread->cbset) {
>  ovsrcu_flush_cbset__(perthread, true);
>  }
> diff --git a/lib/seq.c b/lib/seq.c
> index 99e5bf8bd..22646f3f8 100644
> --- a/lib/seq.c
> +++ b/lib/seq.c
> @@ -32,7 +32,7 @@ COVERAGE_DEFINE(seq_change);
>
>  /* A sequence number object. */
>  struct seq {
> -uint64_t value OVS_GUARDED;
> +atomic_uint64_t value;
>  struct hmap waiters OVS_GUARDED; /* Contains 'struct seq_waiter's. */
>  };
>
> @@ -72,6 +72,7 @@ static void seq_wake_waiters(struct seq *) 
> OVS_REQUIRES(seq_mutex);
>  struct seq * OVS_EXCLUDED(seq_mutex)
>  seq_create(void)
>  {
> +uint64_t seq_value;
>  struct seq *seq;
>
>  seq_init();
> @@ -81,7 +82,8 @@ seq_create(void)
>  COVERAGE_INC(seq_change);
>
>  ovs_mutex_lock(_mutex);

I think it's also possible to get rid of the mutex here. Operations
like modifying a bridge or interface can result in calling seq_create
dozens of times, which could potentially lead to contention in cases
when a lot of interfaces are constantly added or removed. I'm mainly
thinking about kubernetes here.

Other than that, I found this patch to reduce locking on seq_mutex
pretty significantly with a userspace workload:

Before: 468727.2/sec
After: 229265.8/sec

The rest of this patch looks good to me, but what do you think about:

diff --git a/lib/seq.c b/lib/seq.c
index 22646f3f8..09fdccce5 100644
--- a/lib/seq.c
+++ b/lib/seq.c
@@ -57,7 +57,7 @@ struct seq_thread {

 static struct ovs_mutex seq_mutex = OVS_MUTEX_INITIALIZER;

-static uint64_t seq_next OVS_GUARDED_BY(seq_mutex) = 1;
+static atomic_uint64_t seq_next = 1;

 static pthread_key_t seq_thread_key;

@@ -81,11 +81,9 @@ seq_create(void)

 COVERAGE_INC(seq_change);

-ovs_mutex_lock(_mutex);
-seq_value = seq_next++;
+seq_value = atomic_count_inc64(_next);
 atomic_store_relaxed(>value, seq_value);
 hmap_init(>waiters);
-ovs_mutex_unlock(_mutex);

 return seq;
 }
@@ -128,7 +126,7 @@ void
 seq_change_protected(struct seq *seq)
 OVS_REQUIRES(seq_mutex)
 {
-uint64_t seq_value = seq_next++;
+uint64_t seq_value = atomic_count_inc64(_next);

 COVERAGE_INC(seq_change);

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v5] lib, ovsdb, ovs-vsctl, vtep-ctl: Fix multiple Coverity defects

2023-03-28 Thread James R T
On Mon, Mar 27, 2023 at 5:03 PM Eelco Chaudron  wrote:
>
> I did not review this, but I noticed you do null checks for memset() and 
> memcpy(). But there are functions for this in OVS like nullable_memset() and 
> nullable_memcpy(), maybe some of the changes might benefit from using this.
>
> In addition, there is also a nullable_string_is_equal() which might be useful 
> in some cases.

Ah okay sure, thank you for the suggestions Eelco. I was not aware of
the `nullable_` functions. I will look into them when improving this
patch.

On Tue, Mar 28, 2023 at 6:36 PM Ilya Maximets  wrote:
>
> I'd also say that the vast majority of changes here should
> be replaced with assertions, because they are checking for
> impossible conditions.  For example, there should be no way
> the name and tables are not set after successful parser run
> in ovsdb_schema_from_json(), because they are not optional.
> Explicit handling of impossible cases is confusing for readers.
> Use of assertions will also reduce the patch size significantly.

Got it, thank you for the suggestions Ilya. I will incorporate them
into the next patch version as soon as I find time to work on it.

Best regards,
James Raphael Tiovalen
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] northd: Make the use of common zone in NAT configurable

2023-03-28 Thread Daniel Alvarez Sanchez
Thanks a lot Ales!

On Tue, Feb 7, 2023 at 9:08 AM Ales Musil  wrote:

> There are essentially three problems with the current
> combination of DGP + SNAT + LB:
>
> 1) The first packet is being SNATed in common zone due
> to a problem with pinctrl not preserving ct_mark/ct_label.
> The commit would create a SNAT entry within the same with DNAT
> entry.
>
> 2) The unSNAT for reply always happened in common zone because of
> the loopback check which would be triggered only when we loop
> the packet through the LR. Now there are two possibilities how
> the reply packet would be handled:
>
> a) If the entry for SNAT in common zone did not time out yet, the
> packet would be passed through unSNAT in common zone which would
> be fine and continue on. However, the unDNAT wouldn't work due to
> the limitation of CT not capable of doing unSNAT/unDNAT on the same
> packet twice in the same zone. So the reply would arrive to
> the original interface, but with wrong source address.
>
> b) If the entry for SNAT timed out it would loop and do unSNAT correctly
> in separate zone and then also unDNAT. This is not possible anymore with
> a recent change 8c341b9d (northd: Drop packets destined to router owned
> NAT IP for DGP).
> The reply would be dropped before looping after that change co the traffic
> would never arrive to the original interface.
>
> 3) The unDNAT was happening only if the DGP was outport meaning
> the reply traffic was routed out, but in the opposite case
> the unDNAT was happening only because of the looping which made
> outport=inport. That's why it worked before introduction of explicit drop.
>
> In order to fix all those issues do two changes:
>
> 1) Include inport in the unDNAT match, so that we have both
> routing directions covered e.g. (inport == "dgp_port" || outport ==
> "dpg_port").
>
> 2) Always use the separate zone for SNAT and DNAT. As the common
> zone was needed for HWOL make the common zone optional with
> configuration option called "use_common_zone". This option is
> by default "false" and can be specified per LR. Use of separate
> zones also eliminates the need for the flag propagation
> in "lr_out_chk_dnat_local" stage, removing the match on ct_mark/ct_label.
>

I can't think of a scenario - at least in OpenStack - where we would have
LRs with different values of this setting.
Would it be easier to have a global setting instead?
For the specific case of OpenStack it would be also way easier as it is a
setting we can turn on/off (likely at deployment time) and not have Neutron
having to take care of updating each LR; ie. we can hide this from Neutron.

What do you think?
Thanks a lot!
daniel

>
> Reported-at: https://bugzilla.redhat.com/2161281
> Signed-off-by: Ales Musil 
> ---
>  northd/northd.c | 509 +---
>  northd/ovn-northd.8.xml |  90 +--
>  ovn-nb.xml  |  10 +
>  tests/ovn-northd.at | 217 -
>  tests/ovn.at|   3 +
>  tests/system-ovn.at |  76 +-
>  6 files changed, 509 insertions(+), 396 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 77e105b86..902f21d77 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -10519,7 +10519,13 @@ build_distr_lrouter_nat_flows_for_lb(struct
> lrouter_nat_lb_flows_ctx *ctx,
>   enum lrouter_nat_lb_flow_type type,
>   struct ovn_datapath *od)
>  {
> -char *gw_action = od->is_gw_router ? "ct_dnat;" : "ct_dnat_in_czone;";
> +bool use_common_zone =
> +smap_get_bool(>nbr->options, "use_common_zone", false);
> +char *gw_action = !od->is_gw_router && use_common_zone
> +  ? "ct_dnat_in_czone;"
> +  : "ct_dnat;";
> +struct ovn_port *dgp = od->l3dgw_ports[0];
> +
>  /* Store the match lengths, so we can reuse the ds buffer. */
>  size_t new_match_len = ctx->new_match->length;
>  size_t est_match_len = ctx->est_match->length;
> @@ -10556,10 +10562,9 @@ build_distr_lrouter_nat_flows_for_lb(struct
> lrouter_nat_lb_flows_ctx *ctx,
>  char *action = type == LROUTER_NAT_LB_FLOW_NORMAL
> ? gw_action : ctx->est[type].action;
>
> -ds_put_format(ctx->undnat_match,
> -  ") && outport == %s && is_chassis_resident(%s)",
> -  od->l3dgw_ports[0]->json_key,
> -  od->l3dgw_ports[0]->cr_port->json_key);
> +ds_put_format(ctx->undnat_match, ") && (inport == %s || outport ==
> %s)"
> +  " && is_chassis_resident(%s)", dgp->json_key,
> dgp->json_key,
> +  dgp->cr_port->json_key);
>  ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_OUT_UNDNAT, 120,
>  ds_cstr(ctx->undnat_match), action,
>  >lb->nlb->header_);
> @@ -11025,13 +11030,8 @@ copy_ra_to_sb(struct ovn_port *op, const char
> *address_mode)
>  static inline bool
>  

Re: [ovs-dev] [RFC PATCH ovn v2 8/9] ci: Use container to run the tests

2023-03-28 Thread Dumitru Ceara
On 3/15/23 07:29, Ales Musil wrote:
> Move the ci.sh script into .ci folder
> and remove the linux-prepare.sh as it
> is no longer needed with all the requirements
> installed in container.

What about the OSX jobs?  Can't we wrap those in ci.sh easily?

I wonder if we can change container_exec() so that it runs on the host
on OSX.  What do you think?

Regards,
Dumitru

> 
> Signed-off-by: Ales Musil 
> ---
>  {utilities/containers => .ci}/ci.sh |  0
>  .ci/linux-prepare.sh| 21 ---
>  .github/workflows/test.yml  | 54 ++---
>  Makefile.am |  2 +-
>  utilities/automake.mk   |  1 -
>  5 files changed, 3 insertions(+), 75 deletions(-)
>  rename {utilities/containers => .ci}/ci.sh (100%)
>  delete mode 100755 .ci/linux-prepare.sh
> 
> diff --git a/utilities/containers/ci.sh b/.ci/ci.sh
> similarity index 100%
> rename from utilities/containers/ci.sh
> rename to .ci/ci.sh
> diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh
> deleted file mode 100755
> index 6617d0c42..0
> --- a/.ci/linux-prepare.sh
> +++ /dev/null
> @@ -1,21 +0,0 @@
> -#!/bin/bash
> -
> -set -ev
> -
> -# Build and install sparse.
> -#
> -# Explicitly disable sparse support for llvm because some travis
> -# environments claim to have LLVM (llvm-config exists and works) but
> -# linking against it fails.
> -# Disabling sqlite support because sindex build fails and we don't
> -# really need this utility being installed.
> -git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git
> -cd sparse && make -j4 HAVE_LLVM= HAVE_SQLITE= install && cd ..
> -
> -# Installing wheel separately because it may be needed to build some
> -# of the packages during dependency backtracking and pip >= 22.0 will
> -# abort backtracking on build failures:
> -# https://github.com/pypa/pip/issues/10655
> -pip3 install --disable-pip-version-check --user wheel
> -pip3 install --disable-pip-version-check --user \
> --r utilities/containers/py-requirements.txt
> diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
> index 90dc8a6f1..d8b39da2c 100644
> --- a/.github/workflows/test.yml
> +++ b/.github/workflows/test.yml
> @@ -14,11 +14,7 @@ concurrency:
>  jobs:
>build-linux:
>  env:
> -  dependencies: |
> -automake libtool gcc bc libjemalloc2 libjemalloc-dev\
> -libssl-dev llvm-dev libelf-dev libnuma-dev libpcap-dev  \
> -selinux-policy-dev ncat python3-scapy isc-dhcp-server
> -  m32_dependecies: gcc-multilib
> +  IMAGE_NAME:  ghcr.io/ovn-org/ovn-tests:ubuntu
>ARCH:${{ matrix.cfg.arch }}
>CC:  ${{ matrix.cfg.compiler }}
>LIBS:${{ matrix.cfg.libs }}
> @@ -56,7 +52,6 @@ jobs:
>  - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
> test_range: "-100" }
>  - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
> test_range: "101-200" }
>  - { compiler: clang, testsuite: system-test, sanitizers: sanitizers, 
> test_range: "201-" }
> -- { arch: x86, compiler: gcc, opts: --disable-ssl }
>  
>  steps:
>  - name: checkout
> @@ -80,53 +75,8 @@ jobs:
>  path: 'ovs'
>  ref: 'master'
>  
> -- name: update APT cache
> -  run:  sudo apt update
> -
> -- name: remove netcat-openbsd
> -  run:  sudo apt remove -y netcat-openbsd
> -
> -- name: install required dependencies
> -  run:  sudo apt install -y ${{ env.dependencies }}
> -
> -- name: install libunbound libunwind
> -  if:   matrix.cfg.arch != 'x86'
> -  run:  sudo apt install -y libunbound-dev libunwind-dev
> -
> -- name: install 32-bit dependencies
> -  if:   matrix.cfg.arch == 'x86'
> -  run:  sudo apt install -y ${{ env.m32_dependecies }}
> -
> -- name: update PATH
> -  run:  |
> -echo "$HOME/bin">> $GITHUB_PATH
> -echo "$HOME/.local/bin" >> $GITHUB_PATH
> -
> -- name: set up python
> -  uses: actions/setup-python@v4
> -  with:
> -python-version: '3.x'
> -
> -- name: prepare
> -  run:  ./.ci/linux-prepare.sh
> -
>  - name: build
> -  run:  ./.ci/linux-build.sh
> -
> -- name: copy logs on failure
> -  if: failure() || cancelled()
> -  run: |
> -# upload-artifact@v3 throws exceptions if it tries to upload socket
> -# files and we could have some socket files in testsuite.dir.
> -# Also, upload-artifact@v3 doesn't work well enough with wildcards.
> -# So, we're just archiving everything here to avoid any issues.
> -mkdir logs
> -cp config.log ./logs/
> -cp -r ./*/_build/sub/tests/testsuite.* ./logs/ || true
> -# System tests are run as root, need to adjust permissions.
> -sudo chmod -R +r ./tests/system-kmod-testsuite.* || true
> -cp -r ./tests/system-kmod-testsuite.* ./logs/ || true
> -tar -czvf logs.tgz logs/
> +  

[ovs-dev] [PATCH v2] netdev-tc-offloads: Fix misaligned 8 byte read.

2023-03-28 Thread Mike Pattrick
UB Sanitizer report:

lib/netdev-offload-tc.c:1276:19: runtime error: load of misaligned
address 0x7f74e801976c for type 'union ovs_u128', which requires 8 byte
alignment

#0 in netdev_tc_flow_dump_next lib/netdev-offload-tc.c:1276
#1 in netdev_flow_dump_next lib/netdev-offload.c:303
#2 in dpif_netlink_flow_dump_next lib/dpif-netlink.c:1921
[...]

Signed-off-by: Mike Pattrick 
---
Since v1:
 - Changed memcpy to get_32aligned_u128
 - Checked full size of cookie length
---
 lib/netdev-offload-tc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 247c1ff8b..4721f0160 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1282,8 +1282,8 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
 continue;
 }
 
-if (flower.act_cookie.len) {
-*ufid = *((ovs_u128 *) flower.act_cookie.data);
+if (flower.act_cookie.len >= sizeof *ufid) {
+*ufid = get_32aligned_u128(flower.act_cookie.data);
 } else if (!find_ufid(netdev, , ufid)) {
 continue;
 }
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 3/9] ci: Add argument for the py-requirements path

2023-03-28 Thread Dumitru Ceara
On 3/15/23 07:29, Ales Musil wrote:
> In order to build the container from different
> working directory we need to properly specify the path
> to py-requirements.txt. Add ARG into the Dockerfile
> and into Makefile which allows us to specify the path.
> 
> Signed-off-by: Ales Musil 
> ---
>  build-aux/initial-tab-whitelist| 1 +
>  utilities/containers/Makefile  | 5 -
>  utilities/containers/fedora/Dockerfile | 4 +++-
>  3 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/build-aux/initial-tab-whitelist b/build-aux/initial-tab-whitelist
> index b2f5a0791..8ad43d616 100644
> --- a/build-aux/initial-tab-whitelist
> +++ b/build-aux/initial-tab-whitelist
> @@ -9,3 +9,4 @@
>  ^debian/rules.modules$
>  ^debian/rules$
>  ^\.gitmodules$
> +^utilities/containers/Makefile
> diff --git a/utilities/containers/Makefile b/utilities/containers/Makefile
> index 8b39e3493..ef1d42aca 100644
> --- a/utilities/containers/Makefile
> +++ b/utilities/containers/Makefile
> @@ -1,7 +1,10 @@
>  CONTAINER_CMD ?= podman
>  DISTRO ?= fedora
>  IMAGE_NAME ?= "ovn-org/ovn-tests"
> +CONTAINERS_PATH ?= "."
>  
>  .PHONY: build
>  
> -build: ;$(CONTAINER_CMD) build --no-cache --rm -t $(IMAGE_NAME) -f 
> $(DISTRO)/Dockerfile .
> +build:
> + $(CONTAINER_CMD) build --no-cache --rm -t $(IMAGE_NAME) \
> + -f $(DISTRO)/Dockerfile . --build-arg=CONTAINERS_PATH=$(CONTAINERS_PATH)
> diff --git a/utilities/containers/fedora/Dockerfile 
> b/utilities/containers/fedora/Dockerfile
> index 4212e2d76..4058d7f5b 100755
> --- a/utilities/containers/fedora/Dockerfile
> +++ b/utilities/containers/fedora/Dockerfile
> @@ -1,5 +1,7 @@
>  FROM quay.io/fedora/fedora:latest
>  
> +ARG CONTAINERS_PATH
> +
>  # Update distro, install packages and clean any possible leftovers
>  RUN dnf -y update \
>  && \
> @@ -52,7 +54,7 @@ RUN git clone 
> git://git.kernel.org/pub/scm/devel/sparse/sparse.git \
>  
>  WORKDIR /workspace
>  
> -COPY py-requirements.txt /tmp/py-requirements.txt
> +COPY $CONTAINERS_PATH/py-requirements.txt /tmp/py-requirements.txt
>  
>  # Update and install pip dependencies
>  RUN python3 -m pip install --upgrade pip \

Depending on the direction we choose for patch 1/9 this one might also
need changes.

In any case, in the current form:

Acked-by: Dumitru Ceara 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 2/9] ci: Use the latest Fedora as base image

2023-03-28 Thread Dumitru Ceara
On 3/15/23 07:29, Ales Musil wrote:
> Rather than pining the version use the latest
> tag.
> 
> Signed-off-by: Ales Musil 
> ---
>  utilities/containers/fedora/Dockerfile | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/utilities/containers/fedora/Dockerfile 
> b/utilities/containers/fedora/Dockerfile
> index d24bf1232..4212e2d76 100755
> --- a/utilities/containers/fedora/Dockerfile
> +++ b/utilities/containers/fedora/Dockerfile
> @@ -1,4 +1,4 @@
> -FROM quay.io/fedora/fedora:37
> +FROM quay.io/fedora/fedora:latest
>  
>  # Update distro, install packages and clean any possible leftovers
>  RUN dnf -y update \

Looks good to me, thanks!

Acked-by: Dumitru Ceara 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-tc-offloads: Fix misaligned 8 byte read.

2023-03-28 Thread Ilya Maximets
On 3/28/23 16:21, Mike Pattrick wrote:
> On Tue, Mar 28, 2023 at 6:02 AM Ilya Maximets  wrote:
>>
>> On 3/27/23 22:32, Mike Pattrick wrote:
>>> UB Sanitizer report:
>>>
>>> lib/netdev-offload-tc.c:1276:19: runtime error: load of misaligned
>>> address 0x7f74e801976c for type 'union ovs_u128', which requires 8 byte
>>> alignment
>>>
>>> #0 in netdev_tc_flow_dump_next lib/netdev-offload-tc.c:1276
>>> #1 in netdev_flow_dump_next lib/netdev-offload.c:303
>>> #2 in dpif_netlink_flow_dump_next lib/dpif-netlink.c:1921
>>> [...]
>>>
>>
>> Thanks for the fix, Mike!
>>
>> How did you catch it?  UBsan doesn't report that for me while
>> running a check-offloads testsuite.
> 
> I compiled with gcc 11.3.1 (20221121) if that helps. Other than that,
> with current master:
> 
> # ./configure CFLAGS="-fsanitize=undefined"
> # make -j 50
> # make check-offloads TESTSUITEFLAGS="2"
> ## -- ##
> ## openvswitch 3.1.90 test suite. ##
> ## -- ##
>   2: offloads - ping between two ports - offloads enabled FAILED
> (system-offloads-traffic.at:72)
> 
> However, clang version 15.0.7 doesn't identify this spot.

Hmm, interesting.  I mostly use clang for sanitizers.

> 
>>
>>> Signed-off-by: Mike Pattrick 
>>> ---
>>>  lib/netdev-offload-tc.c | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>> index 4fb9d9f21..506b74ce7 100644
>>> --- a/lib/netdev-offload-tc.c
>>> +++ b/lib/netdev-offload-tc.c
>>> @@ -1273,7 +1273,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump 
>>> *dump,
>>>  }
>>>
>>>  if (flower.act_cookie.len) {
>>> -*ufid = *((ovs_u128 *) flower.act_cookie.data);
>>> +memcpy(ufid, flower.act_cookie.data, sizeof(ovs_u128));
>>
>> Please, don't use sizeof(type).  It's prone to errors and also
>> against the coding style.  'sizeof *ufid' should be used instead.
>>
>> What is the actual alignment of this structure?  If it's 4-bytes,
>> then we should use get_32aligned_u128() instead to be more clear
>> on what is going on here.
> 
> I'll resubmit with this correction and Eelco's excellent suggestion.
> 
> 
> Cheers,
> M
> 
>>
>> Best regards, Ilya Maximets.
>>
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2 1/9] ci: Add missing packages to the container

2023-03-28 Thread Dumitru Ceara
On 3/15/23 07:29, Ales Musil wrote:
> Add missing jemalloc, kmod and scapy packages.
> Install scapy through pip, because the
> python3-scapy package has a lot of dependencies
> that would increase the overall size of the image
> by ~800 MB.
> 
> Signed-off-by: Ales Musil 
> ---
>  utilities/containers/fedora/Dockerfile   | 2 ++
>  utilities/containers/py-requirements.txt | 1 +
>  2 files changed, 3 insertions(+)
> 
> diff --git a/utilities/containers/fedora/Dockerfile 
> b/utilities/containers/fedora/Dockerfile
> index be8cd6ff2..d24bf1232 100755
> --- a/utilities/containers/fedora/Dockerfile
> +++ b/utilities/containers/fedora/Dockerfile
> @@ -19,7 +19,9 @@ RUN dnf -y update \
>  iproute \
>  iproute-tc \
>  iputils \
> +jemalloc-devel \
>  kernel-devel \
> +kmod \
>  libcap-ng-devel \
>  libtool \
>  net-tools \

I think it might be better to follow the official installation procedure
and instead of doing this do something like:

https://github.com/ovn-org/ovn/blob/1bec9e3ddd8500793b52e11c3dc1f41ef1f48591/.ci/ovn-kubernetes/Dockerfile#L19

RUN sed -e 's/@VERSION@/0.0.1/' rhel/openvswitch-fedora.spec.in > /tmp/ovs.spec
RUN dnf builddep -y /tmp/ovs.spec

https://github.com/ovn-org/ovn/blob/1bec9e3ddd8500793b52e11c3dc1f41ef1f48591/.ci/ovn-kubernetes/Dockerfile#L28

RUN sed -e 's/@VERSION@/0.0.1/' rhel/ovn-fedora.spec.in > /tmp/ovn.spec
RUN dnf builddep -y /tmp/ovn.spec

There's still the risk that a new dependency is added and the new image
hasn't been built/pushed yet.  How do you think it's best to address
that?

> diff --git a/utilities/containers/py-requirements.txt 
> b/utilities/containers/py-requirements.txt
> index d7bd21e0d..0d90765c9 100644
> --- a/utilities/containers/py-requirements.txt
> +++ b/utilities/containers/py-requirements.txt
> @@ -1,5 +1,6 @@
>  flake8
>  hacking>=3.0
> +scapy
>  sphinx
>  setuptools
>  pyelftools

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn branch-22.12] lb: Allow IPv6 template LBs to use explicit backends.

2023-03-28 Thread Dumitru Ceara
This was working fine for IPv4 but partially by accident because IPv4
addresses don't contain ':'.  Fix it for the general case by trying to
parse explicit backends if parsing templates fails.

Also add unit and system tests for all supported cases.

Fixes: b45853fba816 ("lb: Support using templates.")
Signed-off-by: Dumitru Ceara 
Acked-by: Ales Musil 
(cherry picked from commit 7310a9859a6a5a9ad4faff1e5620e5cfa142eb7d)
---
NOTE: this is a custom backport because it picks up the tests added by
086744a645a7 ("northd: Use LB port_str in northd.").  It doesn't pick up
the rest of the changes from that commit because they're not applicable
on branch-22.12.
---
 lib/lb.c  |  51 ++---
 lib/lb.h  |   6 +-
 tests/ovn-nbctl.at|  26 +
 tests/system-ovn.at   | 126 --
 utilities/ovn-nbctl.c |   2 +-
 5 files changed, 182 insertions(+), 29 deletions(-)

diff --git a/lib/lb.c b/lib/lb.c
index c13d07b99c..b74e4d90c1 100644
--- a/lib/lb.c
+++ b/lib/lb.c
@@ -36,6 +36,7 @@ static const char *lb_neighbor_responder_mode_names[] = {
 static struct nbrec_load_balancer_health_check *
 ovn_lb_get_health_check(const struct nbrec_load_balancer *nbrec_lb,
 const char *vip_port_str, bool template);
+static void ovn_lb_backends_clear(struct ovn_lb_vip *vip);
 
 struct ovn_lb_ip_set *
 ovn_lb_ip_set_create(void)
@@ -236,6 +237,8 @@ ovn_lb_backends_init_template(struct ovn_lb_vip *lb_vip, 
const char *value_)
 ds_put_format(, "%s: should be a template of the form: "
   "'^backendip_variable1[:^port_variable1|:port]', ",
   atom);
+free(backend_port);
+free(backend_ip);
 }
 free(atom);
 }
@@ -283,8 +286,27 @@ ovn_lb_vip_init_template(struct ovn_lb_vip *lb_vip, const 
char *lb_key_,
  lb_key_);
 }
 
+/* Backends can either be templates or explicit IPs and ports. */
 lb_vip->address_family = address_family;
-return ovn_lb_backends_init_template(lb_vip, lb_value);
+lb_vip->template_backends = true;
+char *template_error = ovn_lb_backends_init_template(lb_vip, lb_value);
+
+if (template_error) {
+lb_vip->template_backends = false;
+ovn_lb_backends_clear(lb_vip);
+
+char *explicit_error = ovn_lb_backends_init_explicit(lb_vip, lb_value);
+if (explicit_error) {
+char *error =
+xasprintf("invalid backend: template (%s) OR explicit (%s)",
+  template_error, explicit_error);
+free(explicit_error);
+free(template_error);
+return error;
+}
+free(template_error);
+}
+return NULL;
 }
 
 /* Returns NULL on success, an error string on failure.  The caller is
@@ -302,15 +324,29 @@ ovn_lb_vip_init(struct ovn_lb_vip *lb_vip, const char 
*lb_key,
address_family);
 }
 
-void
-ovn_lb_vip_destroy(struct ovn_lb_vip *vip)
+static void
+ovn_lb_backends_destroy(struct ovn_lb_vip *vip)
 {
-free(vip->vip_str);
-free(vip->port_str);
 for (size_t i = 0; i < vip->n_backends; i++) {
 free(vip->backends[i].ip_str);
 free(vip->backends[i].port_str);
 }
+}
+
+static void
+ovn_lb_backends_clear(struct ovn_lb_vip *vip)
+{
+ovn_lb_backends_destroy(vip);
+vip->backends = NULL;
+vip->n_backends = 0;
+}
+
+void
+ovn_lb_vip_destroy(struct ovn_lb_vip *vip)
+{
+free(vip->vip_str);
+free(vip->port_str);
+ovn_lb_backends_destroy(vip);
 free(vip->backends);
 }
 
@@ -355,11 +391,10 @@ ovn_lb_vip_format(const struct ovn_lb_vip *vip, struct ds 
*s, bool template)
 }
 
 void
-ovn_lb_vip_backends_format(const struct ovn_lb_vip *vip, struct ds *s,
-   bool template)
+ovn_lb_vip_backends_format(const struct ovn_lb_vip *vip, struct ds *s)
 {
 bool needs_brackets = vip->address_family == AF_INET6 && vip->port_str
-  && !template;
+  && !vip->template_backends;
 for (size_t i = 0; i < vip->n_backends; i++) {
 struct ovn_lb_backend *backend = >backends[i];
 
diff --git a/lib/lb.h b/lib/lb.h
index 55becc1bf7..e5a3875609 100644
--- a/lib/lb.h
+++ b/lib/lb.h
@@ -98,6 +98,9 @@ struct ovn_lb_vip {
   */
 struct ovn_lb_backend *backends;
 size_t n_backends;
+bool template_backends; /* True if the backends are templates. False if
+ * they're explicitly specified.
+ */
 bool empty_backend_rej;
 int address_family;
 };
@@ -203,7 +206,6 @@ char *ovn_lb_vip_init(struct ovn_lb_vip *lb_vip, const char 
*lb_key,
 void ovn_lb_vip_destroy(struct ovn_lb_vip *vip);
 void ovn_lb_vip_format(const struct ovn_lb_vip *vip, struct ds *s,
bool template);
-void ovn_lb_vip_backends_format(const struct 

Re: [ovs-dev] [PATCH] netdev-tc-offloads: Fix misaligned 8 byte read.

2023-03-28 Thread Mike Pattrick
On Tue, Mar 28, 2023 at 6:02 AM Ilya Maximets  wrote:
>
> On 3/27/23 22:32, Mike Pattrick wrote:
> > UB Sanitizer report:
> >
> > lib/netdev-offload-tc.c:1276:19: runtime error: load of misaligned
> > address 0x7f74e801976c for type 'union ovs_u128', which requires 8 byte
> > alignment
> >
> > #0 in netdev_tc_flow_dump_next lib/netdev-offload-tc.c:1276
> > #1 in netdev_flow_dump_next lib/netdev-offload.c:303
> > #2 in dpif_netlink_flow_dump_next lib/dpif-netlink.c:1921
> > [...]
> >
>
> Thanks for the fix, Mike!
>
> How did you catch it?  UBsan doesn't report that for me while
> running a check-offloads testsuite.

I compiled with gcc 11.3.1 (20221121) if that helps. Other than that,
with current master:

# ./configure CFLAGS="-fsanitize=undefined"
# make -j 50
# make check-offloads TESTSUITEFLAGS="2"
## -- ##
## openvswitch 3.1.90 test suite. ##
## -- ##
  2: offloads - ping between two ports - offloads enabled FAILED
(system-offloads-traffic.at:72)

However, clang version 15.0.7 doesn't identify this spot.

>
> > Signed-off-by: Mike Pattrick 
> > ---
> >  lib/netdev-offload-tc.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > index 4fb9d9f21..506b74ce7 100644
> > --- a/lib/netdev-offload-tc.c
> > +++ b/lib/netdev-offload-tc.c
> > @@ -1273,7 +1273,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump 
> > *dump,
> >  }
> >
> >  if (flower.act_cookie.len) {
> > -*ufid = *((ovs_u128 *) flower.act_cookie.data);
> > +memcpy(ufid, flower.act_cookie.data, sizeof(ovs_u128));
>
> Please, don't use sizeof(type).  It's prone to errors and also
> against the coding style.  'sizeof *ufid' should be used instead.
>
> What is the actual alignment of this structure?  If it's 4-bytes,
> then we should use get_32aligned_u128() instead to be more clear
> on what is going on here.

I'll resubmit with this correction and Eelco's excellent suggestion.


Cheers,
M

>
> Best regards, Ilya Maximets.
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v2] lb: Allow IPv6 template LBs to use explicit backends.

2023-03-28 Thread Dumitru Ceara
On 3/24/23 15:23, Ales Musil wrote:
> On Fri, Mar 24, 2023 at 3:00 PM Dumitru Ceara  wrote:
> 
>> This was working fine for IPv4 but partially by accident because IPv4
>> addresses don't contain ':'.  Fix it for the general case by trying to
>> parse explicit backends if parsing templates fails.
>>
>> Also add unit and system tests for all supported cases.
>>
>> Fixes: b45853fba816 ("lb: Support using templates.")
>> Signed-off-by: Dumitru Ceara 
>> ---
>> V2:
>> - Addressed Ales' comments:
>>   - Improved error handling when parsing backends.
>> ---
>>  lib/lb.c  | 51 ++--
>>  lib/lb.h  |  6 +++--
>>  tests/ovn-nbctl.at| 26 +++
>>  tests/system-ovn.at   | 60 +--
>>  utilities/ovn-nbctl.c |  2 +-
>>  5 files changed, 126 insertions(+), 19 deletions(-)
>>
>> diff --git a/lib/lb.c b/lib/lb.c
>> index 66c8152750..f88c1855b7 100644
>> --- a/lib/lb.c
>> +++ b/lib/lb.c
>> @@ -38,6 +38,7 @@ static const char *lb_neighbor_responder_mode_names[] = {
>>  static struct nbrec_load_balancer_health_check *
>>  ovn_lb_get_health_check(const struct nbrec_load_balancer *nbrec_lb,
>>  const char *vip_port_str, bool template);
>> +static void ovn_lb_backends_clear(struct ovn_lb_vip *vip);
>>
>>  struct ovn_lb_ip_set *
>>  ovn_lb_ip_set_create(void)
>> @@ -238,6 +239,8 @@ ovn_lb_backends_init_template(struct ovn_lb_vip
>> *lb_vip, const char *value_)
>>  ds_put_format(, "%s: should be a template of the form:
>> "
>>
>>  "'^backendip_variable1[:^port_variable1|:port]', ",
>>atom);
>> +free(backend_port);
>> +free(backend_ip);
>>  }
>>  free(atom);
>>  }
>> @@ -285,8 +288,27 @@ ovn_lb_vip_init_template(struct ovn_lb_vip *lb_vip,
>> const char *lb_key_,
>>   lb_key_);
>>  }
>>
>> +/* Backends can either be templates or explicit IPs and ports. */
>>  lb_vip->address_family = address_family;
>> -return ovn_lb_backends_init_template(lb_vip, lb_value);
>> +lb_vip->template_backends = true;
>> +char *template_error = ovn_lb_backends_init_template(lb_vip,
>> lb_value);
>> +
>> +if (template_error) {
>> +lb_vip->template_backends = false;
>> +ovn_lb_backends_clear(lb_vip);
>> +
>> +char *explicit_error = ovn_lb_backends_init_explicit(lb_vip,
>> lb_value);
>> +if (explicit_error) {
>> +char *error =
>> +xasprintf("invalid backend: template (%s) OR explicit
>> (%s)",
>> +  template_error, explicit_error);
>> +free(explicit_error);
>> +free(template_error);
>> +return error;
>> +}
>> +free(template_error);
>> +}
>> +return NULL;
>>  }
>>
>>  /* Returns NULL on success, an error string on failure.  The caller is
>> @@ -304,15 +326,29 @@ ovn_lb_vip_init(struct ovn_lb_vip *lb_vip, const
>> char *lb_key,
>> address_family);
>>  }
>>
>> -void
>> -ovn_lb_vip_destroy(struct ovn_lb_vip *vip)
>> +static void
>> +ovn_lb_backends_destroy(struct ovn_lb_vip *vip)
>>  {
>> -free(vip->vip_str);
>> -free(vip->port_str);
>>  for (size_t i = 0; i < vip->n_backends; i++) {
>>  free(vip->backends[i].ip_str);
>>  free(vip->backends[i].port_str);
>>  }
>> +}
>> +
>> +static void
>> +ovn_lb_backends_clear(struct ovn_lb_vip *vip)
>> +{
>> +ovn_lb_backends_destroy(vip);
>> +vip->backends = NULL;
>> +vip->n_backends = 0;
>> +}
>> +
>> +void
>> +ovn_lb_vip_destroy(struct ovn_lb_vip *vip)
>> +{
>> +free(vip->vip_str);
>> +free(vip->port_str);
>> +ovn_lb_backends_destroy(vip);
>>  free(vip->backends);
>>  }
>>
>> @@ -357,11 +393,10 @@ ovn_lb_vip_format(const struct ovn_lb_vip *vip,
>> struct ds *s, bool template)
>>  }
>>
>>  void
>> -ovn_lb_vip_backends_format(const struct ovn_lb_vip *vip, struct ds *s,
>> -   bool template)
>> +ovn_lb_vip_backends_format(const struct ovn_lb_vip *vip, struct ds *s)
>>  {
>>  bool needs_brackets = vip->address_family == AF_INET6 && vip->port_str
>> -  && !template;
>> +  && !vip->template_backends;
>>  for (size_t i = 0; i < vip->n_backends; i++) {
>>  struct ovn_lb_backend *backend = >backends[i];
>>
>> diff --git a/lib/lb.h b/lib/lb.h
>> index ddd41703da..e24f519dbb 100644
>> --- a/lib/lb.h
>> +++ b/lib/lb.h
>> @@ -96,6 +96,9 @@ struct ovn_lb_vip {
>>*/
>>  struct ovn_lb_backend *backends;
>>  size_t n_backends;
>> +bool template_backends; /* True if the backends are templates. False
>> if
>> + * they're explicitly specified.
>> + */
>>  bool empty_backend_rej;
>>  int address_family;
>>  };
>> @@ -211,8 +214,7 @@ char 

Re: [ovs-dev] [PATCH ovn v2] northd, arp_proxy: response ARP at localnet lsp

2023-03-28 Thread Dumitru Ceara
On 3/24/23 10:11, Ales Musil wrote:
> On Wed, Mar 22, 2023 at 2:44 PM Enrique Llorente 
> wrote:
> 
>> The localnet LSPs skip responding ARP/NDP [1], this change relax this
>> restriction by allowing responding from localnet LSPs when owner
>> switch has a router LSP with a proper arp_proxy configuration.
>>
>> [1] ovn-northd.8.xml
>>
>> Signed-off-by: Enrique Llorente 
>> ---
>>  northd/northd.c | 19 ---
>>  northd/northd.h |  1 +
>>  northd/ovn-northd.8.xml |  7 ---
>>  tests/system-ovn.at | 34 ++
>>  4 files changed, 47 insertions(+), 14 deletions(-)
>>
>> diff --git a/northd/northd.c b/northd/northd.c
>> index 5f0b436c2..ee44a2e8b 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -2721,12 +2721,17 @@ join_logical_ports(struct northd_input *input_data,
>>   */
>>  const char *arp_proxy =
>> smap_get(>nbsp->options,"arp_proxy");
>>  int ofs = 0;
>> -if (arp_proxy &&
>> -!extract_addresses(arp_proxy, >proxy_arp_addrs, )
>> &&
>> -!extract_ip_addresses(arp_proxy, >proxy_arp_addrs)) {
>> -static struct vlog_rate_limit rl =
>> VLOG_RATE_LIMIT_INIT(1, 5);
>> -VLOG_WARN_RL(, "Invalid arp_proxy option: '%s' at lsp
>> '%s'",
>> - arp_proxy, op->nbsp->name);
>> +if (arp_proxy) {
>> +if (extract_addresses(arp_proxy, >proxy_arp_addrs,
>> ) ||
>> +extract_ip_addresses(arp_proxy,
>> >proxy_arp_addrs)) {
>> +op->od->has_arp_proxy_port = true;
>> +} else {
>> +static struct vlog_rate_limit rl =
>> +VLOG_RATE_LIMIT_INIT(1, 5);
>> +VLOG_WARN_RL(,
>> +"Invalid arp_proxy option: '%s' at lsp '%s'",
>> +arp_proxy, op->nbsp->name);
>> +}
>>  }
>>  } else if (op->nbrp && op->nbrp->peer && !op->l3dgw_port) {
>>  struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
>> @@ -8428,7 +8433,7 @@ build_lswitch_arp_nd_responder_skip_local(struct
>> ovn_port *op,
>>struct hmap *lflows,
>>struct ds *match)
>>  {
>> -if (op->nbsp && lsp_is_localnet(op->nbsp)) {
>> +if (op->nbsp && lsp_is_localnet(op->nbsp) &&
>> !op->od->has_arp_proxy_port) {
>>
> 
> This is checked at single place so we can do probably do !(op->od>
> 
> 
>>  ds_clear(match);
>>  ds_put_format(match, "inport == %s", op->json_key);
>>  ovn_lflow_add_with_lport_and_hint(lflows, op->od,
>> diff --git a/northd/northd.h b/northd/northd.h
>> index 4d9055296..5a666bbd3 100644
>> --- a/northd/northd.h
>> +++ b/northd/northd.h
>> @@ -214,6 +214,7 @@ struct ovn_datapath {
>>  bool has_unknown;
>>  bool has_acls;
>>  bool has_vtep_lports;
>> +bool has_arp_proxy_port;
>>
>>  /* IPAM data. */
>>  struct ipam_info ipam_info;
>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>> index 5d513e65a..7d400a7b8 100644
>> --- a/northd/ovn-northd.8.xml
>> +++ b/northd/ovn-northd.8.xml
>> @@ -1269,9 +1269,10 @@
>>
>>  
>>
>> -Priority-100 flows to skip the ARP responder if inport is of type
>> -localnet advances directly to the next table.  ARP
>> -requests sent to localnet ports can be received by
>> +If the logical switch has no router ports with options:arp_proxy
>> +configured add a priority-100 flows to skip the ARP responder if
>> inport
>> +is of type localnet advances directly to the next
>> table.
>> +ARP requests sent to localnet ports can be received
>> by
>>  multiple hypervisors.  Now, because the same mac binding rules are
>>  downloaded to all hypervisors, each of the multiple hypervisors
>> will
>>  respond.  This will confuse L2 learning on the source of the ARP
>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
>> index ad1188078..f820a5206 100644
>> --- a/tests/system-ovn.at
>> +++ b/tests/system-ovn.at
>> @@ -10431,6 +10431,8 @@ AT_SKIP_IF([test $HAVE_TCPDUMP = no])
>>  ovn_start
>>  OVS_TRAFFIC_VSWITCHD_START()
>>  ADD_BR([br-int])
>> +ADD_BR([br-ext])
>> +ovs-ofctl add-flow br-ext action=normal
>>
>>  # Set external-ids in br-int needed for ovn-controller
>>  ovs-vsctl \
>> @@ -10447,7 +10449,9 @@ start_daemon ovn-controller
>>  # One LR - R1 and two LSs - foo and bar, R1 has switches foo (
>> 192.168.1.0/24) and
>>  # bar (192.168.2.0/24) connected to it
>>  #
>> -#foo -- R1 -- bar
>> +#br-ext -- localnet -- foo -- R1 -- bar
>> +
>> +check ovs-vsctl set open .
>> external-ids:ovn-bridge-mappings=provider:br-ext
>>
>>  check ovn-nbctl lr-add R1
>>  check ovn-nbctl ls-add foo
>> @@ -10456,13 +10460,14 @@ check ovn-nbctl ls-add bar
>>  # Connect 

Re: [ovs-dev] [PATCH ovn] northd: Remove redundant checks.

2023-03-28 Thread Dumitru Ceara
On 3/27/23 19:43, Han Zhou wrote:
> On Mon, Mar 27, 2023 at 4:04 AM Dumitru Ceara  wrote:
>>
>> Commits 53febfbc3776 ("northd: Split switch and router datapaths.") and
>> b2f09ac55041 ("northd: Split switch ports and router ports.") made it
>> such that for most router/switch specific functions we never process
>> records that are not applicable to the function type.  Remove the
>> redundant checks and replace them with asserts as it's a bug to call the
>> functions with the wrong types of ports/datapaths.
>>
>> Signed-off-by: Dumitru Ceara 
>> ---
>> CC: Han Zhou 
>> ---
>>  northd/en-sync-sb.c|  4 +---
>>  northd/mac-binding-aging.c |  4 +++-
>>  northd/northd.c| 32 +---
>>  3 files changed, 13 insertions(+), 27 deletions(-)
>>
>> diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c
>> index 1e4e73ebcd..758f00bfd5 100644
>> --- a/northd/en-sync-sb.c
>> +++ b/northd/en-sync-sb.c
>> @@ -273,9 +273,7 @@ sync_addr_sets(struct ovsdb_idl_txn *ovnsb_txn,
>>  /* Sync router load balancer VIP generated address sets. */
>>  struct ovn_datapath *od;
>>  HMAP_FOR_EACH (od, key_node, _datapaths->datapaths) {
>> -if (!od->nbr) {
>> -continue;
>> -}
>> +ovs_assert(od->nbr);
>>
>>  if (sset_count(>lb_ips->ips_v4_reachable)) {
>>  char *ipv4_addrs_name =
> lr_lb_address_set_name(od->tunnel_key,
>> diff --git a/northd/mac-binding-aging.c b/northd/mac-binding-aging.c
>> index 6db00c1660..725c52f903 100644
>> --- a/northd/mac-binding-aging.c
>> +++ b/northd/mac-binding-aging.c
>> @@ -110,7 +110,9 @@ en_mac_binding_aging_run(struct engine_node *node,
> void *data OVS_UNUSED)
>>
>>  struct ovn_datapath *od;
>>  HMAP_FOR_EACH (od, key_node, _data->lr_datapaths.datapaths) {
>> -if (od->sb && od->nbr) {
>> +ovs_assert(od->nbr);
>> +
>> +if (od->sb) {
>>  mac_binding_aging_run_for_datapath(od->sb, od->nbr,
>>
> sbrec_mac_binding_by_datapath,
>> now, _expire_msec,
>> diff --git a/northd/northd.c b/northd/northd.c
>> index 339286be34..5afb0dc6a0 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -654,9 +654,7 @@ snat_ip_add(struct ovn_datapath *od, const char *ip,
> struct ovn_nat *nat_entry)
>>  static void
>>  init_nat_entries(struct ovn_datapath *od)
>>  {
>> -if (!od->nbr) {
>> -return;
>> -}
>> +ovs_assert(od->nbr);
>>
>>  shash_init(>snat_ips);
>>  if (get_force_snat_ip(od, "dnat", >dnat_force_snat_addrs)) {
>> @@ -755,9 +753,7 @@ destroy_nat_entries(struct ovn_datapath *od)
>>  static void
>>  init_router_external_ips(struct ovn_datapath *od)
>>  {
>> -if (!od->nbr) {
>> -return;
>> -}
>> +ovs_assert(od->nbr);
>>
>>  sset_init(>external_ips);
>>  for (size_t i = 0; i < od->nbr->n_nat; i++) {
>> @@ -841,10 +837,6 @@ destroy_lb_for_datapath(struct ovn_datapath *od)
>>  {
>>  ovn_lb_ip_set_destroy(od->lb_ips);
>>  od->lb_ips = NULL;
>> -
>> -if (!od->nbs && !od->nbr) {
>> -return;
>> -}
>>  }
>>
>>  /* A group of logical router datapaths which are connected - either
>> @@ -1072,9 +1064,7 @@ init_mcast_info_for_switch_datapath(struct
> ovn_datapath *od)
>>  static void
>>  init_mcast_flow_count(struct ovn_datapath *od)
>>  {
>> -if (od->nbr) {
>> -return;
>> -}
>> +ovs_assert(od->nbs);
>>
>>  struct mcast_switch_info *mcast_sw_info = >mcast_info.sw;
>>  atomic_init(_sw_info->active_v4_flows, 0);
>> @@ -3198,9 +3188,7 @@ ovn_update_ipv6_prefix(struct hmap *lr_ports)
>>  {
>>  const struct ovn_port *op;
>>  HMAP_FOR_EACH (op, key_node, lr_ports) {
>> -if (!op->nbrp) {
>> -continue;
>> -}
>> +ovs_assert(op->nbrp);
>>
>>  if (!smap_get_bool(>nbrp->options, "prefix", false)) {
>>  continue;
>> @@ -4092,9 +4080,7 @@ build_lbs(const struct nbrec_load_balancer_table
> *nbrec_load_balancer_table,
>>  }
>>
>>  HMAP_FOR_EACH (od, key_node, _datapaths->datapaths) {
>> -if (!od->nbr) {
>> -continue;
>> -}
>> +ovs_assert(od->nbr);
>>
>>  /* Checking load balancer groups first, starting from the
> largest one,
>>   * to more efficiently copy IP sets. */
>> @@ -4247,9 +4233,7 @@ build_lrouter_lbs_check(const struct ovn_datapaths
> *lr_datapaths)
>>  struct ovn_datapath *od;
>>
>>  HMAP_FOR_EACH (od, key_node, _datapaths->datapaths) {
>> -if (!od->nbr) {
>> -continue;
>> -}
>> +ovs_assert(od->nbr);
>>
>>  if (od->has_lb_vip && od->n_l3dgw_ports > 1
>>  && !smap_get(>nbr->options, "chassis")) {
>> @@ -6707,7 +6691,9 @@ ovn_update_ipv6_options(struct hmap *lr_ports)
>>  {
>>  struct ovn_port *op;
>>  HMAP_FOR_EACH (op, key_node, lr_ports) {
>> -if (!op->nbrp || op->nbrp->peer || !op->peer) {
>> +

Re: [ovs-dev] [PATCH v2 ovn] northd: do not centralize FIP traffic if redirect-type is set to fixed

2023-03-28 Thread Dumitru Ceara
On 3/22/23 10:13, Dumitru Ceara wrote:
> On 9/30/22 12:43, Dumitru Ceara wrote:
>> On 9/29/22 20:23, Mark Michelson wrote:
>>> Thanks Lorenzo,
>>>
>>> Acked-by: Mark Michelson 
>>
>> Thanks, Lorenzo and Mark!
>>
>> I applied this to the main branch.
> 
> We received a request to backport this bug fix all the way down to
> branch-21.12 (from Luis in CC).  I'm planning to do that but I'll wait a
> day or two to give people some time to raise any points they may have.
> 

I backported this all the way down to branch-21.12.

Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] northd: Drop packets for LBs with no backends

2023-03-28 Thread Dumitru Ceara
On 3/15/23 09:11, Ales Musil wrote:
> When the LB is configured without any backend
> and doesn't report event or reject the packet
> just simply drop the packet.
> 
> Reported-at: https://bugzilla.redhat.com/2177173
> Signed-off-by: Ales Musil 
> ---

Thanks, Ales!  I applied this to main and backported it to 23.03.  Do we
need it on older branches too?  If so, please post a backport patch, we
have some conflicts there.

Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] Typo fix in the ovn-nbctl man page.

2023-03-28 Thread Dumitru Ceara
On 3/28/23 15:46, Dumitru Ceara wrote:
> From: Roberto Bartzen Acosta 
> 
> According to the ovn-nbctl man8 (NAT):
> [--may-exist] [--stateless] [--gateway_port=GATEWAY_PORT]
> lr-nat-add router type external_ip logical_ip [logical_port external_mac]
> 
> Result using man page syntax:
> ovn-nbctl: unrecognized option '--gateway_port'
> 
> The option expected by the implementation is:
> --gateway-port
> 
> Signed-off-by: Roberto Bartzen Acosta 
> ---

Submitted-at: https://github.com/ovn-org/ovn/pull/184

Thanks for the contribution, Roberto!

I applied this to the main branch and I added you to the authors list.

Regards,
Dumitru

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 ovn] controller: lflow: do not use tcp as default IP protocol for ct_snat_to_vip action

2023-03-28 Thread Dumitru Ceara
On 3/24/23 09:49, Ales Musil wrote:
> On Mon, Mar 20, 2023 at 11:57 PM Lorenzo Bianconi <
> lorenzo.bianc...@redhat.com> wrote:
> 
>> Fix non-tcp haripin use-case if the load-balancer is configured without
>> ports.
>>
>> Fixes: 022ea339c8e2 ("lflow: Use learn() action to generate LB hairpin
>> reply flows.")
>> Tested-by: Ying Xu 
>> Reviewed-by: Simon Horman 
>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2157846
>> Signed-off-by: Lorenzo Bianconi 
>> ---
>> Change since v1:
>> - pass has_vip_port istead of lb_vip pointer in
>> add_lb_ct_snat_hairpin_for_dp()
>>   routine
>> ---
>>  controller/lflow.c |  25 +++---
>>  tests/ovn.at   | 115 +
>>  2 files changed, 133 insertions(+), 7 deletions(-)
>>
>> diff --git a/controller/lflow.c b/controller/lflow.c
>> index 003195ae4..0b071138d 100644
>> --- a/controller/lflow.c
>> +++ b/controller/lflow.c
>> @@ -1729,6 +1729,7 @@ add_lb_vip_hairpin_flows(const struct
>> ovn_controller_lb *lb,
>>
>>  static void
>>  add_lb_ct_snat_hairpin_for_dp(const struct ovn_controller_lb *lb,
>> +  bool has_vip_port,
>>const struct sbrec_datapath_binding
>> *datapath,
>>const struct hmap *local_datapaths,
>>struct match *dp_match,
>> @@ -1742,12 +1743,20 @@ add_lb_ct_snat_hairpin_for_dp(const struct
>> ovn_controller_lb *lb,
>>  match_set_metadata(dp_match, htonll(datapath->tunnel_key));
>>  }
>>
>> +uint16_t priority = datapath ? 200 : 100;
>> +if (!has_vip_port) {
>> +/* If L4 ports are not specified for the current LB, we will
>> decrease
>> + * the flow priority in order to not collide with other LBs with
>> more
>> + * fine-grained configuration.
>> + */
>> +priority -= 10;
>> +}
>>  /* A flow added for the "hairpin_snat_ip" case will have an extra
>>   * datapath match, but it will also match on the less restrictive
>>   * general case.  Therefore, we set the priority in the
>>   * "hairpin_snat_ip" case to be higher than the general case. */
>>  ofctrl_add_flow(flow_table, OFTABLE_CT_SNAT_HAIRPIN,
>> -datapath ? 200 : 100, lb->slb->header_.uuid.parts[0],
>> +priority, lb->slb->header_.uuid.parts[0],
>>  dp_match, dp_acts, >slb->header_.uuid);
>>  }
>>
>> @@ -1834,8 +1843,8 @@ add_lb_ct_snat_hairpin_vip_flow(const struct
>> ovn_controller_lb *lb,
>>  }
>>  }
>>
>> -match_set_nw_proto(, lb->proto);
>>  if (lb_vip->vip_port) {
>> +match_set_nw_proto(, lb->proto);
>>  if (!lb->hairpin_orig_tuple) {
>>  match_set_ct_nw_proto(, lb->proto);
>>  match_set_ct_tp_dst(, htons(lb_vip->vip_port));
>> @@ -1852,18 +1861,20 @@ add_lb_ct_snat_hairpin_vip_flow(const struct
>> ovn_controller_lb *lb,
>>  }
>>
>>  if (!use_hairpin_snat_ip) {
>> -add_lb_ct_snat_hairpin_for_dp(lb, NULL, NULL,
>> +add_lb_ct_snat_hairpin_for_dp(lb, !!lb_vip->vip_port, NULL, NULL,
>>, , flow_table);
>>  } else {
>>  for (size_t i = 0; i < lb->slb->n_datapaths; i++) {
>> -add_lb_ct_snat_hairpin_for_dp(lb, lb->slb->datapaths[i],
>> -  local_datapaths,
>> -  , , flow_table);
>> +add_lb_ct_snat_hairpin_for_dp(lb, !!lb_vip->vip_port,
>> +  lb->slb->datapaths[i],
>> +  local_datapaths, ,
>> +  , flow_table);
>>  }
>>  if (lb->slb->datapath_group) {
>>  for (size_t i = 0; i < lb->slb->datapath_group->n_datapaths;
>> i++) {
>>  add_lb_ct_snat_hairpin_for_dp(
>> -lb, lb->slb->datapath_group->datapaths[i],
>> +lb, !!lb_vip->vip_port,
>> +lb->slb->datapath_group->datapaths[i],
>>  local_datapaths, , , flow_table);
>>  }
>>  }
>> diff --git a/tests/ovn.at b/tests/ovn.at
>> index c2883ffca..207c16295 100644
>> --- a/tests/ovn.at
>> +++ b/tests/ovn.at
>> @@ -29500,7 +29500,9 @@ OVS_WAIT_UNTIL([test x$(ovn-nbctl lsp-get-up
>> sw1-p1) = xup])
>>
>>  check ovn-nbctl lb-add lb-ipv4-tcp 88.88.88.88:8080 42.42.42.1:4041 tcp
>>  check ovn-nbctl lb-add lb-ipv4-udp 88.88.88.88:4040 42.42.42.1:2021 udp
>> +check ovn-nbctl lb-add lb-ipv4 88.88.88.89 42.42.42.2
>>  check ovn-nbctl lb-add lb-ipv6-tcp [[8800::0088]]:8080 [[4200::1]]:4041
>> tcp
>> +check ovn-nbctl lb-add lb-ipv6 8800::0089 4200::2
>>  check ovn-nbctl --wait=hv lb-add lb-ipv6-udp [[8800::0088]]:4040
>> [[4200::1]]:2021 udp
>>
>>  AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | grep -v NXST],
>> [1], [dnl
>> @@ -29791,6 +29793,119 @@ AT_CHECK([as 

Re: [ovs-dev] [PATCH ovn v2] northd: add router broadcast option to logical switch

2023-03-28 Thread Dumitru Ceara
On 3/24/23 10:11, Ales Musil wrote:
> On Mon, Mar 20, 2023 at 9:04 AM Felix Hüttner via dev <
> ovs-dev@openvswitch.org> wrote:
> 
>> Assume the following setup:
>>
>> ++
>> | Logical Router |
>> | lr001  +-+
>> ++ |
>>|
>> ++ |
>> | Logical Router | | ++ +--+
>> | lr002  +-+-+ Logical Switch +-+ Phyiscal Network |
>> ++ | | ls-ext | |  |
>>| ++ +--+
>>   ...  |
>>|
>> ++ |
>> | Logical Router | |
>> | lr300  +-+
>> ++
>>
>> If a arp request for the ip of lr001 on ls-ext is now received it is
>> only forwarded to that individual logical router.
>>
>> If we however now receive a arp request for an ip not used by any of
>> lr001-lr300 we try to flood the arp request to all logical ports on ls-ext.
>> With around 300 routers this causes the arp request to be dropped after
>> some routers as we hit the 4096 resubmit limit.
>>
>> In the most cases forwarding the arp requests to the logical routers is
>> pointless as we already know all of their ip addresses and they will
>> therefor not be able to answer the arp requests anyway.
>> Only if someone sends garps this is not the case. Then the request would
>> need to be flooded to all logical routers.
>>
>> We can therefor not generally send these arp requests to MC_FLOOD_L2 as
>> this would break garps. As we can also not detect garps we need to leave
>> the solution to our users.
>>
>> To do this we introduce the other_config `broadcast-arps-to-all-routers`
>> on logical switches (which is per default true). If set to false we add
>> a logical flow that forwards arp requests where we do not know a
>> specific target logical switch port to MC_FLOOD_L2, thereby bypassing
>> all logical routers.
>>
>> Signed-off-by: Felix Huettner 
>> ---
>>  NEWS|  5 +
>>  northd/northd.c |  8 
>>  northd/ovn-northd.8.xml |  7 +++
>>  ovn-nb.xml  | 12 
>>  tests/ovn-northd.at | 31 +++
>>  5 files changed, 63 insertions(+)
>>
>> diff --git a/NEWS b/NEWS
>> index 637adcff3..2379f5089 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -2,6 +2,11 @@ Post v23.03.0
>>  -
>>- Enhance LSP.options:arp_proxy to support IPv6, configurable MAC
>>  addresses and CIDRs.
>> +  - Add LS.other_config:broadcast-arps-to-all-routers. If false then arp
>> +requests are only send to Logical Routers on that Logical Switch if
>> the
>> +target mac address matches. Arp requests matching no Logical Router
>> will
>> +only be forwarded to non-router ports. Default is true which keeps the
>> +existing behaviour of flooding these arp requests to all attached
>> Ports.
>>
>>  OVN v23.03.0 - 03 Mar 2023
>>  --
>> diff --git a/northd/northd.c b/northd/northd.c
>> index 5f0b436c2..be6d70d94 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -9030,6 +9030,14 @@ build_lswitch_destination_lookup_bmcast(struct
>> ovn_datapath *od,
>>  }
>>  }
>>
>> +
>> +if (!smap_get_bool(>nbs->other_config,
>> +   "broadcast-arps-to-all-routers", true)) {
>> +ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 72,
>> +"eth.mcast && (arp.op == 1 || nd_ns)",
>> +"outport = \""MC_FLOOD_L2"\"; output;");
>> +}
>> +
>>  ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 70, "eth.mcast",
>>"outport = \""MC_FLOOD"\"; output;");
>>  }
>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>> index 5d513e65a..3d5f579fe 100644
>> --- a/northd/ovn-northd.8.xml
>> +++ b/northd/ovn-northd.8.xml
>> @@ -1880,6 +1880,13 @@ output;
>>  non-router logical ports.
>>
>>
>> +  
>> +A priority-72 flow that outputs all ARP requests and ND packets
>> with
>> +an Ethernet broadcast or multicast eth.dst to the
>> +MC_FLOOD_L2 multicast group if
>> +other_config:broadcast-arps-to-all-routers=true.
>> +  
>> +
>>
>>  A priority-70 flow that outputs all packets with an Ethernet
>> broadcast
>>  or multicast eth.dst to the MC_FLOOD
>> diff --git a/ovn-nb.xml b/ovn-nb.xml
>> index 73f707aa0..d106af8be 100644
>> --- a/ovn-nb.xml
>> +++ b/ovn-nb.xml
>> @@ -729,6 +729,18 @@
>>  localnet ports, fabric traffic that belongs to other tagged
>> networks may
>>  be passed through such a port.
>>
>> +
>> +  > +  type='{"type": "boolean"}'>
>> +Determines whether arp requests and ipv6 neighbor solicitations
>> should
>> +be send to all routers and other switchports (default) or if it
>> should
>> +only be send to switchports where the ip/mac 

Re: [ovs-dev] [PATCH ovn] Typo fix in the ovn-nbctl man page.

2023-03-28 Thread 0-day Robot
Bleep bloop.  Greetings Dumitru Ceara, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 275 characters long (recommended limit is 79)
#30 FILE: utilities/ovn-nbctl.8.xml:1154:
  [--may-exist] [--stateless] 
[--gateway-port=GATEWAY_PORT] lr-nat-add 
router type external_ip logical_ip 
[logical_port external_mac]

Lines checked: 37, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn] Typo fix in the ovn-nbctl man page.

2023-03-28 Thread Dumitru Ceara
From: Roberto Bartzen Acosta 

According to the ovn-nbctl man8 (NAT):
[--may-exist] [--stateless] [--gateway_port=GATEWAY_PORT]
lr-nat-add router type external_ip logical_ip [logical_port external_mac]

Result using man page syntax:
ovn-nbctl: unrecognized option '--gateway_port'

The option expected by the implementation is:
--gateway-port

Signed-off-by: Roberto Bartzen Acosta 
---
 utilities/ovn-nbctl.8.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
index 72d4088f0c..54dbdb791e 100644
--- a/utilities/ovn-nbctl.8.xml
+++ b/utilities/ovn-nbctl.8.xml
@@ -1151,7 +1151,7 @@
 NAT Commands
 
 
-  [--may-exist] [--stateless] 
[--gateway_port=GATEWAY_PORT] lr-nat-add 
router type external_ip logical_ip 
[logical_port external_mac]
+  [--may-exist] [--stateless] 
[--gateway-port=GATEWAY_PORT] lr-nat-add 
router type external_ip logical_ip 
[logical_port external_mac]
   
 
   Adds the specified NAT to router.
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v11 4/5] userspace: Add SRv6 tunnel support.

2023-03-28 Thread Ilya Maximets
On 3/28/23 08:42, Nobuhiro MIKI wrote:
> SRv6 (Segment Routing IPv6) tunnel vport is responsible
> for encapsulation and decapsulation the inner packets with
> IPv6 header and an extended header called SRH
> (Segment Routing Header). See spec in:
> 
> https://datatracker.ietf.org/doc/html/rfc8754
> 
> This patch implements SRv6 tunneling in userspace datapath.
> It uses `remote_ip` and `local_ip` options as with existing
> tunnel protocols. It also adds a dedicated `srv6_segs` option
> to define a sequence of routers called segment list.
> 
> Signed-off-by: Nobuhiro MIKI 
> ---
>  Documentation/faq/configuration.rst |  21 +
>  Documentation/faq/releases.rst  |   1 +
>  NEWS|   2 +
>  include/linux/openvswitch.h |   1 +
>  include/sparse/netinet/in.h |   1 +
>  lib/dpif-netlink-rtnl.c |   5 ++
>  lib/dpif-netlink.c  |   5 ++
>  lib/netdev-native-tnl.c | 127 
>  lib/netdev-native-tnl.h |  10 +++
>  lib/netdev-vport.c  |  53 
>  lib/netdev.h|   4 +
>  lib/packets.h   |  11 +++
>  lib/tnl-ports.c |   5 +-
>  ofproto/ofproto-dpif-xlate.c|   3 +
>  tests/system-kmod-macros.at |   8 ++
>  tests/system-traffic.at | 119 ++
>  tests/system-userspace-macros.at|   6 ++
>  tests/tunnel.at |  56 
>  18 files changed, 437 insertions(+), 1 deletion(-)
> 



> +int
> +netdev_srv6_build_header(const struct netdev *netdev,
> + struct ovs_action_push_tnl *data,
> + const struct netdev_tnl_build_header_params *params)
> +{
> +struct netdev_vport *dev = netdev_vport_cast(netdev);
> +struct netdev_tunnel_config *tnl_cfg;
> +const struct in6_addr *segs;
> +struct srv6_base_hdr *srh;
> +struct in6_addr *s;
> +ovs_be16 dl_type;
> +int err = 0;
> +int nr_segs;
> +int i;
> +
> +ovs_mutex_lock(>mutex);
> +tnl_cfg = >tnl_cfg;
> +
> +if (tnl_cfg->srv6_num_segs) {
> +nr_segs = tnl_cfg->srv6_num_segs;
> +segs = tnl_cfg->srv6_segs;
> +} else {
> +/*
> + * If explicit segment list setting is omitted, tunnel destination
> + * is considered to be the first segment list.
> + */
> +nr_segs = 1;
> +segs = >flow->tunnel.ipv6_dst;
> +}
> +
> +if (!ipv6_addr_equals([0], >flow->tunnel.ipv6_dst)) {
> +err = EINVAL;
> +goto out;
> +}
> +
> +srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING);
> +srh->rt_hdr.segments_left = nr_segs - 1;
> +srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
> +srh->rt_hdr.hdrlen = 2 * nr_segs;
> +srh->last_entry = nr_segs - 1;
> +srh->flags = 0;
> +srh->tag = 0;
> +
> +dl_type = params->flow->dl_type;
> +if (dl_type == htons(ETH_TYPE_IP)) {
> +srh->rt_hdr.nexthdr = IPPROTO_IPIP;
> +} else if (dl_type == htons(ETH_TYPE_IPV6)) {
> +srh->rt_hdr.nexthdr = IPPROTO_IPV6;
> +}

We should probably error out here if for some reason it's not an IP or IPv6.

> +
> +s = ALIGNED_CAST(struct in6_addr *,
> + (char *) srh + sizeof *srh);
> +for (i = 0; i < nr_segs; i++) {
> +/* Segment list is written to the header in reverse order. */
> +memcpy(s, [nr_segs - i - 1], sizeof *s);
> +s++;
> +}
> +
> +data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen;
> +data->tnl_type = OVS_VPORT_TYPE_SRV6;
> +out:
> +ovs_mutex_unlock(>mutex);
> +
> +return err;
> +}
> +
> +void
> +netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
> +struct dp_packet *packet OVS_UNUSED,
> +const struct ovs_action_push_tnl *data OVS_UNUSED)

'packet' and 'data' are actually used by the function.

> +{
> +int ip_tot_size;
> +
> +netdev_tnl_push_ip_header(packet, data->header,
> +  data->header_len, _tot_size);
> +}
> +



> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index a9cf3cbee0be..15c814d6285b 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -3632,6 +3632,9 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, 
> struct eth_addr dmac,
>  case OVS_VPORT_TYPE_BAREUDP:
>  nw_proto = IPPROTO_UDP;
>  break;
> +case OVS_VPORT_TYPE_SRV6:
> +nw_proto = IPPROTO_IPIP;

Sorry, just noticed that we're setting nw_proto to IPIP
always after encapsulation, even if the packet was IPv6.
We should check the dl_type of the original flow and set
the nw_proto appropriately.

You can check that the ping6 test fails if we request to
drop encapsulated IPv4 packets like this:

diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 

Re: [ovs-dev] Reliability of system-offload test #50 [Was: Re: [PATCH v3 2/2] ci: Run tc offload tests in GitHub] Actions.

2023-03-28 Thread Eelco Chaudron


On 10 Mar 2023, at 17:20, Simon Horman wrote:

> On Fri, Mar 10, 2023 at 10:15:44AM +0100, Simon Horman wrote:
>> On Thu, Mar 09, 2023 at 05:22:43PM +0100, Eelco Chaudron wrote:
>>>
>>>
>>> On 9 Mar 2023, at 15:42, Simon Horman wrote:
>>>
 On Wed, Mar 08, 2023 at 04:18:47PM +0100, Eelco Chaudron wrote:
> Run "make check-offloads" as part of the GitHub actions tests.
>
> This test was run 25 times using GitHub actions, and the
> failing rerun test cases where excluded. There are quite some
> first-run failures, but unfortunately, there is no other
> more stable kernel available as a GitHub-hosted runner.
>
> Did not yet include sanitizers in the run, as it's causing
> the test to run too long >30min and there seems to be (timing)
> issues with some of the tests.
>
> Signed-off-by: Eelco Chaudron 

 Hi Eelco,

 I like this patch a lot.
 But I am observing reliability problems when executing the new job.

 For 5 runs, on each occasion some tests failed the first time.
 And on 3 of those runs at least one test failed on the recheck,
 so the job failed.
>>>
>>> Damn :)
>>
>> Yes, it pained me to report this.
>>
>>> I did 25 runs (I did not check for re-runs), and they were fine. I also 
>>> cleaned up my jobs recently, so I no longer have them.
>>>
>>> I can do this again and figure out wich tests are failing. Then analyze the 
>>> failures to see if we need to exclude them or can fine-tune them.
>>
>> I will see if I can spend some cycles on reproducing this (outside of GHA).
>> I'll likely start with the tests that show up in the summary below.
>
> I started off by looking at check-offloads test:
>
> 50. system-traffic.at:1524: testing datapath - basic truncate action ...
>
> I haven't dug into the code to debug the problem yet.
> But I have collected some results that might be interesting.
>
>
> My testing was on a low-end VM with Ubuntu 18.04, with no HW offload:
> $ uname -psv
> Linux #56-Ubuntu SMP Tue Sep 20 13:23:26 UTC 2022 x86_64


So I took the same approach, I have local vagrant VM with ubuntu 22.11 (like on 
GitHub) and ran the tests.

I thought I fixed it by this old commit:

  
https://github.com/openvswitch/ovs/commit/22968988b820aa17a9b050c901208b7d4bed9dac

However, as you can see even after excluding the remaining failures I could not 
figure out, it still fails randomly:

  https://github.com/chaudron/ovs/actions

Note that the above 25x runs I did before and none of the above tests failed…

I was not able to make any of these tests fail on my local Ubuntu, and also 
analysing the results did not lead to a specific thing to fix.

As this is working fine on my Fedora (VM) setup for multiple runs without any 
problem I’ll abandon this patch now :( I’ll try to get a buy-in form the Robot 
to run the datapath tests as part of its sanity check for now…

//Eelco

> Using the latest main branch:
> d2f6fbe9fe6c ("ofproto-dpif-upcall: Remove redundant time_msec() in 
> revalidate().")
>
> I ran this in a for loop that exited on failure.
>
> for i in $(seq 50); do echo $i; TESTSUITEFLAGS="50 -v" make check-offloads >& 
> 50.log || break; done
>
> I did this 5 times.
> On one run it failed 1st go, on 3 other runs it failed in less than 10
> iterations, and on the another it made it to the 16th iteration.
>
> On one of the runs the error looked like this:
>
> ...
> ./system-traffic.at:1573: ovs-appctl revalidator/purge
> ./system-traffic.at:1574: ovs-ofctl dump-flows br0 table=0 | grep "in_port=5" 
> | sed -n 's/.*\(n\_bytes=[0-9]*\).*/\1/p'
> --- /dev/null   2023-03-10 15:43:42.94800 +
> +++ /home/horms/ovs/tests/system-offloads-testsuite.dir/at-groups/50/stderr   
>   2023-03-10 16:00:02.904493675 +
> @@ -0,0 +1 @@
> +ovs-ofctl: br0: failed to connect to socket (Connection reset by peer)
> ...
>
> The others looked like this:
>
>
> ./system-traffic.at:1620: ovs-appctl revalidator/purge
> ./system-traffic.at:1621: ovs-ofctl dump-flows br0 table=0 | grep "in_port=3" 
> | sed -n 's/.*\(n\_bytes=[0-9]*\).*/\1/p'
> ./system-traffic.at:1626: ovs-ofctl dump-flows br0 table=0 | grep "in_port=5" 
> | sed -n 's/.*\(n\_bytes=[0-9]*\).*/\1/p'
> ./system-traffic.at:1630: check_logs
> --- /dev/null   2023-03-10 15:43:42.94800 +
> +++ /home/horms/ovs/tests/system-offloads-testsuite.dir/at-groups/50/stdout   
>   2023-03-10 16:03:06.996925562 +
> @@ -0,0 +1,2 @@
> +2023-03-10T16:03:04.666Z|00137|dpif|WARN|system@ovs-system: failed to 
> flow_del (No such file or directory) 
> ufid:9505babe-8c30-4ee3-bd58-aa4f6f310219 
> recirc_id(0),dp_hash(0),skb_priority(0),in_port(6),skb_mark(0xd4),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),eth(src=a6:02:03:38:97:15,dst=33:33:00:00:00:16),eth_type(0x86dd),ipv6(src=fe80::a402:3ff:fe38:9715,dst=ff02::16,label=0,proto=58,tclass=0,hlimit=1,frag=no),icmpv6(type=143,code=0)
> +2023-03-10T16:03:04.666Z|00138|dpif|WARN|system@ovs-system: failed to 
> flow_del (No such file 

Re: [ovs-dev] [PATCH v5] lib, ovsdb, ovs-vsctl, vtep-ctl: Fix multiple Coverity defects

2023-03-28 Thread Ilya Maximets
On 3/27/23 11:03, Eelco Chaudron wrote:
> 
> 
> On 16 Mar 2023, at 17:36, James Raphael Tiovalen wrote:
> 
>> This commit addresses several high and medium-impact Coverity defects by
>> fixing several possible null-pointer dereferences and potentially
>> uninitialized variables.
>>
>> There were cases when crashes were encountered when some null pointers
>> were dereferenced. Null pointer checks and alternative code paths have
>> been added to prevent unwanted crashes. Additionally, some struct
>> variables were not initialized. Zero-initializations have been added to
>> prevent potential data leaks or undefined behavior.
>>
>> Some variables would always be initialized by either the code flow or
>> the compiler and some pointers would never be null. Thus, some Coverity
>> reports might be false positives. That said, it is still considered best
>> practice to perform null pointer checks and initialize variables upfront
>> just in case to ensure the overall resilience and security of OVS, as
>> long as they do not impact performance-critical code. As a bonus, it
>> would also make static analyzer tools, such as Coverity, happy.
>>
>> Unit tests have been successfully executed via `make check` and they
>> successfully passed.
> 
> I did not review this, but I noticed you do null checks for memset() and 
> memcpy(). But there are functions for this in OVS like nullable_memset() and 
> nullable_memcpy(), maybe some of the changes might benefit from using this.
> 
> In addition, there is also a nullable_string_is_equal() which might be useful 
> in some cases.

I'd also say that the vast majority of changes here should
be replaced with assertions, because they are checking for
impossible conditions.  For example, there should be no way
the name and tables are not set after successful parser run
in ovsdb_schema_from_json(), because they are not optional.
Explicit handling of impossible cases is confusing for readers.
Use of assertions will also reduce the patch size significantly.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-tc-offloads: Fix misaligned 8 byte read.

2023-03-28 Thread Eelco Chaudron



On 27 Mar 2023, at 22:32, Mike Pattrick wrote:

> UB Sanitizer report:
>
> lib/netdev-offload-tc.c:1276:19: runtime error: load of misaligned
> address 0x7f74e801976c for type 'union ovs_u128', which requires 8 byte
> alignment
>
> #0 in netdev_tc_flow_dump_next lib/netdev-offload-tc.c:1276
> #1 in netdev_flow_dump_next lib/netdev-offload.c:303
> #2 in dpif_netlink_flow_dump_next lib/dpif-netlink.c:1921
> [...]
>
> Signed-off-by: Mike Pattrick 
> ---
>  lib/netdev-offload-tc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 4fb9d9f21..506b74ce7 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -1273,7 +1273,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
>  }
>
>  if (flower.act_cookie.len) {

The fix looks good to me, but should we maybe also add some minimal size check?

If (flower.act_cokkie.len >= sizeof(ovs_u128), or an exact match assuming this 
really is a ufid? The latter might need some research to make sure we are not 
returning a longer length due to some padding that could happen, etc.

> -*ufid = *((ovs_u128 *) flower.act_cookie.data);
> +memcpy(ufid, flower.act_cookie.data, sizeof(ovs_u128));
>  } else if (!find_ufid(netdev, , ufid)) {
>  continue;
>  }
> -- 
> 2.39.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-tc-offloads: Fix misaligned 8 byte read.

2023-03-28 Thread Ilya Maximets
On 3/27/23 22:32, Mike Pattrick wrote:
> UB Sanitizer report:
> 
> lib/netdev-offload-tc.c:1276:19: runtime error: load of misaligned
> address 0x7f74e801976c for type 'union ovs_u128', which requires 8 byte
> alignment
> 
> #0 in netdev_tc_flow_dump_next lib/netdev-offload-tc.c:1276
> #1 in netdev_flow_dump_next lib/netdev-offload.c:303
> #2 in dpif_netlink_flow_dump_next lib/dpif-netlink.c:1921
> [...]
> 

Thanks for the fix, Mike!

How did you catch it?  UBsan doesn't report that for me while
running a check-offloads testsuite.

> Signed-off-by: Mike Pattrick 
> ---
>  lib/netdev-offload-tc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 4fb9d9f21..506b74ce7 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -1273,7 +1273,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
>  }
>  
>  if (flower.act_cookie.len) {
> -*ufid = *((ovs_u128 *) flower.act_cookie.data);
> +memcpy(ufid, flower.act_cookie.data, sizeof(ovs_u128));

Please, don't use sizeof(type).  It's prone to errors and also
against the coding style.  'sizeof *ufid' should be used instead.

What is the actual alignment of this structure?  If it's 4-bytes,
then we should use get_32aligned_u128() instead to be more clear
on what is going on here.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v4 3/3] controller: Prevent race in packet buffering

2023-03-28 Thread Ales Musil
here was a race within packet buffering that could
result in first packt being dropped. It could happen
under following conditions and topology:
S1 == R1 == public == R2 == S2
SNAT on R1 and DGP on port connecting R1 with public.

1) The GARP is sent for the GDP SNAT
2) The GARP is delayed on R2 because it's multicast
3) Some traffic that get's buffered on S2
4) An ARP is sent as consequence of the buffering
5) The delayed MAC binding is added to SB
6) Response for the ARP is ignored because the MAC binding
already exists
7) The buffered packet is never sent out and times out

In order to prevent this behavior use the recent MAC bidning
node.

Signed-off-by: Ales Musil 
---
v4: Rebase on top of current main.
Split into three patches.
---
 controller/ovn-controller.c |  7 ++--
 controller/pinctrl.c| 67 +
 controller/pinctrl.h|  5 ++-
 3 files changed, 14 insertions(+), 65 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index d465f00e9..ce0976070 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -5033,6 +5033,8 @@ main(int argc, char *argv[])
 engine_get_internal_data(_template_vars);
 struct ed_type_lb_data *lb_data =
 engine_get_internal_data(_lb_data);
+struct hmap *recent_mac_bindings =
+engine_get_internal_data(_recent_mac_bindings);
 
 ofctrl_init(_output_data->group_table,
 _output_data->meter_table,
@@ -5385,14 +5387,13 @@ main(int argc, char *argv[])
 ovnsb_idl_loop.idl),
 sbrec_service_monitor_table_get(
 ovnsb_idl_loop.idl),
-sbrec_mac_binding_table_get(
-ovnsb_idl_loop.idl),
 sbrec_bfd_table_get(ovnsb_idl_loop.idl),
 br_int, chassis,
 _data->local_datapaths,
 _data->active_tunnels,
 _data->local_active_ports_ipv6_pd,
-_data->local_active_ports_ras);
+_data->local_active_ports_ras,
+recent_mac_bindings);
 stopwatch_stop(PINCTRL_RUN_STOPWATCH_NAME,
time_msec());
 mirror_run(ovs_idl_txn,
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index fcc278c8c..66dcb856c 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -180,9 +180,7 @@ static struct pinctrl pinctrl;
 
 static void init_buffered_packets_data(void);
 static void destroy_buffered_packets_data(void);
-static void
-run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
- const struct sbrec_mac_binding_table *mac_binding_table)
+static void run_buffered_binding(const struct hmap *recent_mbs)
 OVS_REQUIRES(pinctrl_mutex);
 
 static void pinctrl_handle_put_mac_binding(const struct flow *md,
@@ -3448,14 +3446,14 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
 const struct sbrec_dns_table *dns_table,
 const struct sbrec_controller_event_table *ce_table,
 const struct sbrec_service_monitor_table *svc_mon_table,
-const struct sbrec_mac_binding_table *mac_binding_table,
 const struct sbrec_bfd_table *bfd_table,
 const struct ovsrec_bridge *br_int,
 const struct sbrec_chassis *chassis,
 const struct hmap *local_datapaths,
 const struct sset *active_tunnels,
 const struct shash *local_active_ports_ipv6_pd,
-const struct shash *local_active_ports_ras)
+const struct shash *local_active_ports_ras,
+const struct hmap *recent_mac_bindings)
 {
 ovs_mutex_lock(_mutex);
 run_put_mac_bindings(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
@@ -3478,7 +3476,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
   sbrec_port_binding_by_key,
   sbrec_igmp_groups,
   sbrec_ip_multicast_opts);
-run_buffered_binding(sbrec_port_binding_by_name, mac_binding_table);
+run_buffered_binding(recent_mac_bindings);
 sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
   chassis);
 bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name,
@@ -4269,67 +4267,18 @@ run_put_mac_bindings(struct ovsdb_idl_txn 
*ovnsb_idl_txn,
 }
 
 static void
-run_buffered_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
- const struct sbrec_mac_binding_table *mac_binding_table)
+run_buffered_binding(const struct hmap *recent_mbs)
 OVS_REQUIRES(pinctrl_mutex)
 {
-if 

[ovs-dev] [PATCH ovn v4 1/3] pinctrl: Simplify packet buffering

2023-03-28 Thread Ales Musil
Simplify the logic behind packet buffering
and house those simplified helpers
in mac-learn.c.

Signed-off-by: Ales Musil 
---
v4: Rebase on top of current main.
Split into three patches.
---
 controller/mac-learn.c | 171 ++---
 controller/mac-learn.h |  50 ++-
 controller/pinctrl.c   | 329 -
 3 files changed, 298 insertions(+), 252 deletions(-)

diff --git a/controller/mac-learn.c b/controller/mac-learn.c
index a27607016..9d51693a7 100644
--- a/controller/mac-learn.c
+++ b/controller/mac-learn.c
@@ -18,10 +18,10 @@
 #include "mac-learn.h"
 
 /* OpenvSwitch lib includes. */
+#include "dp-packet.h"
 #include "openvswitch/poll-loop.h"
 #include "openvswitch/vlog.h"
 #include "lib/packets.h"
-#include "lib/random.h"
 #include "lib/smap.h"
 #include "lib/timeval.h"
 
@@ -29,11 +29,11 @@ VLOG_DEFINE_THIS_MODULE(mac_learn);
 
 #define MAX_MAC_BINDINGS 1000
 #define MAX_FDB_ENTRIES  1000
-#define MAX_MAC_BINDING_DELAY_MSEC 50
+#define BUFFER_QUEUE_DEPTH 4
 
-static size_t mac_binding_hash(uint32_t dp_key, uint32_t port_key,
-   struct in6_addr *);
-static struct mac_binding *mac_binding_find(struct hmap *mac_bindings,
+static size_t keys_ip_hash(uint32_t dp_key, uint32_t port_key,
+   struct in6_addr *);
+static struct mac_binding *mac_binding_find(const struct hmap *mac_bindings,
 uint32_t dp_key,
 uint32_t port_key,
 struct in6_addr *ip, size_t hash);
@@ -41,6 +41,12 @@ static size_t fdb_entry_hash(uint32_t dp_key, struct 
eth_addr *);
 
 static struct fdb_entry *fdb_entry_find(struct hmap *fdbs, uint32_t dp_key,
 struct eth_addr *mac, size_t hash);
+static struct buffered_packets * buffered_packets_find(struct hmap *hmap,
+   uint64_t dp_key,
+   uint64_t port_key,
+   struct in6_addr *ip,
+   uint32_t hash);
+static void buffered_packets_destroy(struct buffered_packets *bp);
 
 /* mac_binding functions. */
 void
@@ -62,24 +68,23 @@ ovn_mac_bindings_destroy(struct hmap *mac_bindings)
 struct mac_binding *
 ovn_mac_binding_add(struct hmap *mac_bindings, uint32_t dp_key,
 uint32_t port_key, struct in6_addr *ip,
-struct eth_addr mac, bool is_unicast)
+struct eth_addr mac, uint32_t timestamp_offset,
+bool limited_capacity)
 {
-uint32_t hash = mac_binding_hash(dp_key, port_key, ip);
+uint32_t hash = keys_ip_hash(dp_key, port_key, ip);
 
 struct mac_binding *mb =
 mac_binding_find(mac_bindings, dp_key, port_key, ip, hash);
 if (!mb) {
-if (hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) {
+if (limited_capacity && hmap_count(mac_bindings) >= MAX_MAC_BINDINGS) {
 return NULL;
 }
 
-uint32_t delay = is_unicast
-? 0 : random_range(MAX_MAC_BINDING_DELAY_MSEC) + 1;
 mb = xmalloc(sizeof *mb);
 mb->dp_key = dp_key;
 mb->port_key = port_key;
 mb->ip = *ip;
-mb->commit_at_ms = time_msec() + delay;
+mb->expire = time_msec() + timestamp_offset;
 hmap_insert(mac_bindings, >hmap_node, hash);
 }
 mb->mac = mac;
@@ -94,7 +99,7 @@ ovn_mac_binding_wait(struct hmap *mac_bindings)
 struct mac_binding *mb;
 
 HMAP_FOR_EACH (mb, hmap_node, mac_bindings) {
-poll_timer_wait_until(mb->commit_at_ms);
+poll_timer_wait_until(mb->expire);
 }
 }
 
@@ -106,9 +111,9 @@ ovn_mac_binding_remove(struct mac_binding *mb, struct hmap 
*mac_bindings)
 }
 
 bool
-ovn_mac_binding_can_commit(const struct mac_binding *mb, long long now)
+ovn_mac_binding_is_expired(const struct mac_binding *mb, long long now)
 {
-return now >= mb->commit_at_ms;
+return now >= mb->expire;
 }
 
 /* fdb functions. */
@@ -158,17 +163,126 @@ ovn_fdb_add(struct hmap *fdbs, uint32_t dp_key, struct 
eth_addr mac,
 
 }
 
+/* packet buffering functions */
+struct buffered_packets *
+ovn_buffered_packets_add(struct hmap *hmap, uint64_t dp_key, uint64_t port_key,
+ struct in6_addr ip)
+{
+struct buffered_packets *bp;
+
+uint32_t hash = keys_ip_hash(dp_key, port_key, );
+bp = buffered_packets_find(hmap, dp_key, port_key, , hash);
+if (!bp) {
+if (hmap_count(hmap) >= 1000) {
+return NULL;
+}
+
+bp = xmalloc(sizeof *bp);
+hmap_insert(hmap, >hmap_node, hash);
+bp->ip = ip;
+bp->dp_key = dp_key;
+bp->port_key = port_key;
+ovs_list_init(>queue);
+}
+
+bp->expire = time_msec() + OVN_BUFFERED_PACKETS_TIMEOUT;
+
+return bp;
+}
+

[ovs-dev] [PATCH ovn v4 2/3] controller: Add I-P node for recent MAC bindings

2023-03-28 Thread Ales Musil
Add node that will hold data about recently added
MAC bindings. This node will be used later on to prevent
race condition in packet buffering. the MAC binding is
kept in the node longer than the timeout of the buffered
packet to make sure that we don't miss it.

Signed-off-by: Ales Musil 
---
v4: Rebase on top of current main.
Split into three patches.
---
 controller/ovn-controller.c | 201 
 1 file changed, 201 insertions(+)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 76be2426e..d465f00e9 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -45,6 +45,7 @@
 #include "lib/vswitch-idl.h"
 #include "local_data.h"
 #include "lport.h"
+#include "mac-learn.h"
 #include "memory.h"
 #include "ofctrl.h"
 #include "ofctrl-seqno.h"
@@ -4303,6 +4304,185 @@ pflow_output_debug_handler(struct engine_node *node, 
void *data)
 return true;
 }
 
+static void
+recent_mac_bindings_add(struct hmap *recent_mbs,
+const struct sbrec_mac_binding *smb,
+const struct hmap *local_datapaths,
+struct ovsdb_idl_index *sbrec_port_binding_by_name)
+{
+ovs_be32 ip4;
+struct in6_addr ip;
+struct eth_addr mac;
+const struct sbrec_port_binding *pb;
+
+if (!get_local_datapath(local_datapaths, smb->datapath->tunnel_key)) {
+return;
+}
+
+pb = lport_lookup_by_name(sbrec_port_binding_by_name, smb->logical_port);
+if (!pb) {
+return;
+}
+
+if (!eth_addr_from_string(smb->mac, )) {
+return;
+}
+
+if (strchr(smb->ip, '.')) {
+if (!ip_parse(smb->ip, )) {
+return;
+}
+ip = in6_addr_mapped_ipv4(ip4);
+} else {
+if (!ipv6_parse(smb->ip, )) {
+return;
+}
+}
+
+/* Give the recent mac_binding entry bigger timeout than the buffered
+ * packet in case the MAC binding is created earlier. */
+ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, pb->tunnel_key,
+, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT, false);
+
+const char *redirect_port =
+smap_get(>options, "chassis-redirect-port");
+if (!redirect_port) {
+return;
+}
+
+pb = lport_lookup_by_name(sbrec_port_binding_by_name, redirect_port);
+if (!pb || pb->datapath->tunnel_key != smb->datapath->tunnel_key ||
+strcmp(pb->type, "chassisredirect")) {
+return;
+}
+
+/* Add the same entry also for chassisredirect port as the buffered traffic
+ * might be buffered on the cr port. */
+ovn_mac_binding_add(recent_mbs, smb->datapath->tunnel_key, pb->tunnel_key,
+, mac, 2 * OVN_BUFFERED_PACKETS_TIMEOUT, false);
+}
+
+static void *
+en_recent_mac_bindings_init(struct engine_node *node OVS_UNUSED,
+struct engine_arg *arg OVS_UNUSED)
+{
+struct hmap *recent_mbs = xmalloc(sizeof *recent_mbs);
+hmap_init(recent_mbs);
+
+return recent_mbs;
+}
+
+static void
+en_recent_mac_bindings_cleanup(void *data)
+{
+struct hmap *recent_mbs = data;
+ovn_mac_bindings_destroy(recent_mbs);
+}
+
+static void
+en_recent_mac_bindings_clear_tracked_data(void *data)
+{
+long long now = time_msec();
+struct hmap *recent_mbs = data;
+
+struct mac_binding *mb;
+HMAP_FOR_EACH_SAFE (mb, hmap_node, recent_mbs) {
+if (ovn_mac_binding_is_expired(mb, now)) {
+ovn_mac_binding_remove(mb, recent_mbs);
+}
+}
+}
+
+static void
+en_recent_mac_bindings_run(struct engine_node *node, void *data)
+{
+struct hmap *recent_mbs = data;
+struct ed_type_runtime_data *rt_data =
+engine_get_input_data("runtime_data", node);
+const struct sbrec_mac_binding_table *mac_binding_table =
+EN_OVSDB_GET(engine_get_input("SB_mac_binding", node));
+struct ovsdb_idl_index *sbrec_port_binding_by_name =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_port_binding", node),
+"name");
+
+const struct sbrec_mac_binding *smb;
+SBREC_MAC_BINDING_TABLE_FOR_EACH (smb, mac_binding_table) {
+recent_mac_bindings_add(recent_mbs, smb, _data->local_datapaths,
+sbrec_port_binding_by_name);
+}
+}
+
+static bool
+recent_mac_bindings_mac_binding_handler(struct engine_node *node, void *data)
+{
+struct hmap *recent_mbs = data;
+struct ed_type_runtime_data *rt_data =
+engine_get_input_data("runtime_data", node);
+const struct sbrec_mac_binding_table *mac_binding_table =
+EN_OVSDB_GET(engine_get_input("SB_mac_binding", node));
+struct ovsdb_idl_index *sbrec_port_binding_by_name =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_port_binding", node),
+"name");
+
+const struct sbrec_mac_binding *smb;
+SBREC_MAC_BINDING_TABLE_FOR_EACH_TRACKED (smb, mac_binding_table) {
+/* 

[ovs-dev] [PATCH ovn v4 0/3] Prevent race in packet buffering

2023-03-28 Thread Ales Musil
There was a race within packet buffering that could
result in first packt being dropped. It could happen
under following conditions and topology:
S1 == R1 == public == R2 == S2
SNAT on R1 and DGP on port connecting R1 with public.

1) The GARP is sent for the GDP SNAT
2) The GARP is delayed on R2 because it's multicast
3) Some traffic that get's buffered on S2
4) An ARP is sent as consequence of the buffering
5) The delayed MAC binding is added to SB
6) Response for the ARP is ignored because the MAC binding
already exists
7) The buffered packet is never sent out and times out

In order to prevent this behavior add new node that will
keep track of all recently changed MAC bindings. Those
recently changed MAC bindings are kept around for a longer
time than the buffered packets which should ensure that we
can find them even if they are created before the packet
is actually buffered.

At the same time simplify the packet buffering process
and move it to mac-learn module.

Ales Musil (3):
  pinctrl: Simplify packet buffering
  controller: Add I-P node for recent MAC bindings
  controller: Prevent race in packet buffering

 controller/mac-learn.c  | 171 --
 controller/mac-learn.h  |  50 +-
 controller/ovn-controller.c | 208 +-
 controller/pinctrl.c| 336 
 controller/pinctrl.h|   5 +-
 5 files changed, 483 insertions(+), 287 deletions(-)

-- 
2.39.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v11 5/5] odp: Add SRv6 tunnel actions.

2023-03-28 Thread Nobuhiro MIKI
This patch adds ODP actions for SRv6 and its tests.

Signed-off-by: Nobuhiro MIKI 
---
 lib/odp-util.c| 70 +++
 python/ovs/flow/odp.py|  8 
 python/ovs/tests/test_odp.py  | 16 
 tests/odp.at  | 12 +-
 tests/tunnel-push-pop-ipv6.at | 23 
 5 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/lib/odp-util.c b/lib/odp-util.c
index dbd4554d0626..2ec889c417e5 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -715,6 +715,24 @@ format_odp_tnl_push_header(struct ds *ds, struct 
ovs_action_push_tnl *data)
 }
 
 ds_put_char(ds, ')');
+} else if (data->tnl_type == OVS_VPORT_TYPE_SRV6) {
+const struct srv6_base_hdr *srh;
+struct in6_addr *segs;
+int nr_segs;
+int i;
+
+srh = (const struct srv6_base_hdr *) l4;
+segs = ALIGNED_CAST(struct in6_addr *, srh + 1);
+nr_segs = srh->last_entry + 1;
+
+ds_put_format(ds, "srv6(");
+ds_put_format(ds, "segments_left=%d", srh->rt_hdr.segments_left);
+ds_put_format(ds, ",segs(");
+for (i = 0; i < nr_segs; i++) {
+ds_put_format(ds, i > 0 ? "," : "");
+ipv6_format_addr([nr_segs - i - 1], ds);
+}
+ds_put_format(ds, "))");
 } else if (data->tnl_type == OVS_VPORT_TYPE_GRE ||
data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {
 const struct gre_base_hdr *greh;
@@ -1534,6 +1552,7 @@ ovs_parse_tnl_push(const char *s, struct 
ovs_action_push_tnl *data)
 uint8_t hwid, dir;
 uint32_t teid;
 uint8_t gtpu_flags, gtpu_msgtype;
+uint8_t segments_left;
 
 if (!ovs_scan_len(s, , "tnl_push(tnl_port(%"SCNi32"),", 
>tnl_port)) {
 return -EINVAL;
@@ -1775,6 +1794,57 @@ ovs_parse_tnl_push(const char *s, struct 
ovs_action_push_tnl *data)
 tnl_type = OVS_VPORT_TYPE_GTPU;
 header_len = sizeof *eth + ip_len +
  sizeof *udp + sizeof *gtph;
+} else if (ovs_scan_len(s, , "srv6(segments_left=%"SCNu8,
+_left)) {
+struct srv6_base_hdr *srh = (struct srv6_base_hdr *) (ip6 + 1);
+char seg_s[IPV6_SCAN_LEN + 1];
+struct in6_addr *segs;
+struct in6_addr seg;
+uint8_t n_segs = 0;
+
+if (segments_left + 1 > SRV6_MAX_SEGS) {
+return -EINVAL;
+}
+
+ip6->ip6_nxt = IPPROTO_ROUTING;
+
+srh->rt_hdr.hdrlen = 2 * (segments_left + 1);
+srh->rt_hdr.segments_left = segments_left;
+srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
+srh->last_entry = segments_left;
+
+tnl_type = OVS_VPORT_TYPE_SRV6;
+header_len = sizeof *eth + ip_len +
+ sizeof *srh + 8 * srh->rt_hdr.hdrlen;
+/* Parse segment list. */
+if (!ovs_scan_len(s, , ",segs(")) {
+return -EINVAL;
+}
+
+segs = ALIGNED_CAST(struct in6_addr *, srh + 1);
+segs += segments_left;
+
+while (ovs_scan_len(s, , IPV6_SCAN_FMT, seg_s)
+   && inet_pton(AF_INET6, seg_s, ) == 1) {
+if (n_segs == segments_left + 1) {
+return -EINVAL;
+}
+
+memcpy(segs--, , sizeof *segs);
+n_segs++;
+
+if (s[n] == ',') {
+n++;
+}
+}
+
+if (!ovs_scan_len(s, , ")))")) {
+return -EINVAL;
+}
+
+if (n_segs != segments_left + 1) {
+return -EINVAL;
+}
 } else {
 return -EINVAL;
 }
diff --git a/python/ovs/flow/odp.py b/python/ovs/flow/odp.py
index db63afc8d64d..88aee17fb2a4 100644
--- a/python/ovs/flow/odp.py
+++ b/python/ovs/flow/odp.py
@@ -474,6 +474,14 @@ class ODPFlow(Flow):
 }
 )
 ),
+"srv6": nested_kv_decoder(
+KVDecoders(
+{
+"segments_left": decode_int,
+"segs": decode_default,
+}
+)
+),
 }
 )
 ),
diff --git a/python/ovs/tests/test_odp.py b/python/ovs/tests/test_odp.py
index f8017ca8a169..a50d3185cc62 100644
--- a/python/ovs/tests/test_odp.py
+++ b/python/ovs/tests/test_odp.py
@@ -452,6 +452,22 @@ def test_odp_fields(input_string, expected):
 ),
 ],
 ),
+(
+
"actions:tnl_push(header(srv6(segments_left=1,segs(2001:cafe::90,2001:cafe::91",
  # noqa: E501
+[
+KeyValue(
+"tnl_push",
+{
+ 

[ovs-dev] [PATCH v11 4/5] userspace: Add SRv6 tunnel support.

2023-03-28 Thread Nobuhiro MIKI
SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI 
---
 Documentation/faq/configuration.rst |  21 +
 Documentation/faq/releases.rst  |   1 +
 NEWS|   2 +
 include/linux/openvswitch.h |   1 +
 include/sparse/netinet/in.h |   1 +
 lib/dpif-netlink-rtnl.c |   5 ++
 lib/dpif-netlink.c  |   5 ++
 lib/netdev-native-tnl.c | 127 
 lib/netdev-native-tnl.h |  10 +++
 lib/netdev-vport.c  |  53 
 lib/netdev.h|   4 +
 lib/packets.h   |  11 +++
 lib/tnl-ports.c |   5 +-
 ofproto/ofproto-dpif-xlate.c|   3 +
 tests/system-kmod-macros.at |   8 ++
 tests/system-traffic.at | 119 ++
 tests/system-userspace-macros.at|   6 ++
 tests/tunnel.at |  56 
 18 files changed, 437 insertions(+), 1 deletion(-)

diff --git a/Documentation/faq/configuration.rst 
b/Documentation/faq/configuration.rst
index dc6c92446f98..4df390dc2d9d 100644
--- a/Documentation/faq/configuration.rst
+++ b/Documentation/faq/configuration.rst
@@ -238,6 +238,27 @@ Q: Does Open vSwitch support GTP-U?
 set int gtpu0 type=gtpu options:key= \
 options:remote_ip=172.31.1.1
 
+Q: Does Open vSwitch support SRv6?
+
+A: Yes. Starting with version 3.2, the Open vSwitch userspace
+datapath supports SRv6 (Segment Routing over IPv6). The following
+example shows tunneling to fc00:300::1 via fc00:100::1 and fc00:200::1.
+In the current implementation, if "IPv6 in IPv6" or "IPv4 in IPv6" packets
+are routed to this interface, and these packets are not SRv6 packets, they
+may be dropped, so be careful in workloads with a mix of these tunnels.
+Also note the following restrictions:
+
+* Segment list length is limited to 6.
+* SRv6 packets with other than segments_left = 0 are simply dropped.
+
+::
+
+$ ovs-vsctl add-br br0
+$ ovs-vsctl add-port br0 srv6_0 -- \
+set int srv6_0 type=srv6  \
+options:remote_ip=fc00:100::1 \
+options:srv6_segs="fc00:100::1,fc00:200::1,fc00:300::1"
+
 Q: How do I connect two bridges?
 
 A: First, why do you want to do this?  Two connected bridges are not much
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index 9e1b42262000..9fb679e307d9 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -151,6 +151,7 @@ Q: Are all features available with all datapaths?
 Tunnel - ERSPAN 4.18   2.10 2.10 NO
 Tunnel - ERSPAN-IPv64.18   2.10 2.10 NO
 Tunnel - GTP-U  NO NO   2.14 NO
+Tunnel - SRv6   NO NO   3.2  NO
 Tunnel - Bareudp5.7NO   NO   NO
 QoS - Policing  YES1.1  2.6  NO
 QoS - Shaping   YES1.1  NO   NO
diff --git a/NEWS b/NEWS
index 8771ee618aed..a8d77a75a248 100644
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,8 @@ Post-v3.1.0
  * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
with the --hw-rawio-access command line option.  This allows the
process extra privileges when mapping physical interconnect memory.
+   - SRv6 Tunnel Protocol
+ * Only support for userspace datapath.
 
 
 v3.1.0 - 16 Feb 2023
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index bc8f74991849..e305c331516b 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -254,6 +254,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_IP6GRE = 109,
OVS_VPORT_TYPE_GTPU = 110,
OVS_VPORT_TYPE_BAREUDP = 111,  /* Bareudp tunnel. */
+   OVS_VPORT_TYPE_SRV6 = 112,  /* SRv6 tunnel. */
__OVS_VPORT_TYPE_MAX
 };
 
diff --git a/include/sparse/netinet/in.h b/include/sparse/netinet/in.h
index 21deceb28d41..00927281643a 100644
--- a/include/sparse/netinet/in.h
+++ b/include/sparse/netinet/in.h
@@ -68,6 +68,7 @@ struct sockaddr_in6 {
 #define IPPROTO_HOPOPTS 0
 #define IPPROTO_ICMP 1
 #define IPPROTO_IGMP 2
+#define IPPROTO_IPIP 4
 #define IPPROTO_TCP 6
 #define IPPROTO_UDP 17
 #define IPPROTO_ROUTING 43
diff --git a/lib/dpif-netlink-rtnl.c 

[ovs-dev] [PATCH v11 3/5] flow: Support rt_hdr in parse_ipv6_ext_hdrs().

2023-03-28 Thread Nobuhiro MIKI
Checks whether IPPROTO_ROUTING exists in the IPv6 extension headers.
If it exists, the first address is retrieved.

If NULL is specified for "frag_hdr" and/or "rt_hdr", those addresses in
the header are not reported to the caller. Of course, "frag_hdr" and
"rt_hdr" are properly parsed inside this function.

Signed-off-by: Nobuhiro MIKI 
---
 lib/conntrack.c |  4 ++--
 lib/flow.c  | 48 +---
 lib/flow.h  |  3 ++-
 lib/ipf.c   | 15 ---
 lib/packets.h   |  9 +
 5 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 8cf7779c6703..f86fa26f466d 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1617,8 +1617,8 @@ extract_l3_ipv6(struct conn_key *key, const void *data, 
size_t size,
 uint8_t nw_proto = ip6->ip6_nxt;
 uint8_t nw_frag = 0;
 
-const struct ovs_16aligned_ip6_frag *frag_hdr;
-if (!parse_ipv6_ext_hdrs(, , _proto, _frag, _hdr)) {
+if (!parse_ipv6_ext_hdrs(, , _proto, _frag,
+ NULL, NULL)) {
 return false;
 }
 
diff --git a/lib/flow.c b/lib/flow.c
index c3a3aa3ce45d..9501a259e9d4 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -479,9 +479,17 @@ invalid:
 static inline bool
 parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, uint8_t *nw_proto,
   uint8_t *nw_frag,
-  const struct ovs_16aligned_ip6_frag **frag_hdr)
+  const struct ovs_16aligned_ip6_frag **frag_hdr,
+  const struct ip6_rt_hdr **rt_hdr)
 {
-*frag_hdr = NULL;
+if (frag_hdr) {
+*frag_hdr = NULL;
+}
+
+if (rt_hdr) {
+*rt_hdr = NULL;
+}
+
 while (1) {
 if (OVS_LIKELY((*nw_proto != IPPROTO_HOPOPTS)
&& (*nw_proto != IPPROTO_ROUTING)
@@ -504,7 +512,6 @@ parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, 
uint8_t *nw_proto,
 }
 
 if ((*nw_proto == IPPROTO_HOPOPTS)
-|| (*nw_proto == IPPROTO_ROUTING)
 || (*nw_proto == IPPROTO_DSTOPTS)) {
 /* These headers, while different, have the fields we care
  * about in the same location and with the same
@@ -515,6 +522,18 @@ parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, 
uint8_t *nw_proto,
 (ext_hdr->ip6e_len + 1) * 8))) {
 return false;
 }
+} else if (*nw_proto == IPPROTO_ROUTING) {
+const struct ip6_rt_hdr *tmp;
+if (!rt_hdr) {
+rt_hdr = 
+}
+
+*rt_hdr = *datap;
+*nw_proto = (*rt_hdr)->nexthdr;
+if (OVS_UNLIKELY(!data_try_pull(datap, sizep,
+((*rt_hdr)->hdrlen + 1) * 8))) {
+return false;
+}
 } else if (*nw_proto == IPPROTO_AH) {
 /* A standard AH definition isn't available, but the fields
  * we care about are in the same location as the generic
@@ -527,6 +546,11 @@ parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, 
uint8_t *nw_proto,
 return false;
 }
 } else if (*nw_proto == IPPROTO_FRAGMENT) {
+const struct ovs_16aligned_ip6_frag *tmp;
+if (!frag_hdr) {
+frag_hdr = 
+}
+
 *frag_hdr = *datap;
 
 *nw_proto = (*frag_hdr)->ip6f_nxt;
@@ -561,15 +585,19 @@ parse_ipv6_ext_hdrs__(const void **datap, size_t *sizep, 
uint8_t *nw_proto,
  * has FLOW_NW_FRAG_LATER set.  Both first and later fragments have
  * FLOW_NW_FRAG_ANY set in 'nw_frag'.
  *
+ * If a routing header is found, '*rt_hdr' is set to the routing
+ * header and otherwise set to NULL.
+ *
  * A return value of false indicates that there was a problem parsing
  * the extension headers.*/
 bool
 parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
 uint8_t *nw_frag,
-const struct ovs_16aligned_ip6_frag **frag_hdr)
+const struct ovs_16aligned_ip6_frag **frag_hdr,
+const struct ip6_rt_hdr **rt_hdr)
 {
 return parse_ipv6_ext_hdrs__(datap, sizep, nw_proto, nw_frag,
- frag_hdr);
+ frag_hdr, rt_hdr);
 }
 
 bool
@@ -945,9 +973,8 @@ miniflow_extract(struct dp_packet *packet, struct miniflow 
*dst)
 nw_ttl = nh->ip6_hlim;
 nw_proto = nh->ip6_nxt;
 
-const struct ovs_16aligned_ip6_frag *frag_hdr;
-if (!parse_ipv6_ext_hdrs__(, , _proto, _frag,
-   _hdr)) {
+if (!parse_ipv6_ext_hdrs(, , _proto, _frag,
+ NULL, NULL)) {
 goto out;
 }
 
@@ -1200,10 +1227,9 @@ parse_tcp_flags(struct dp_packet *packet,
 plen = ntohs(nh->ip6_plen); /* Never pull padding. */
 

[ovs-dev] [PATCH v11 0/5] userspace: Add SRv6 tunnel support.

2023-03-28 Thread Nobuhiro MIKI
v11:
* Fix comments.
* Clean up conditional statements.
  ("!rt_hdr || rt_hdr->type != IPV6_SRCRT_TYPE_4").
* Remove variables from function prototype.
* Define IPPROTO_IPIP for sparse.
v10:
* Clean up tnl_type_to_nw_proto().
* Support frag_hdr=NULL and/or rt_hdr=NULL in parse_ipv6_ext_hdrs().
* Clean up srv6_build_header() to use netdev_tnl_ip_build_header().
* Fix to avoid using sizeof() style.
* Add validation that checks segs[0] == params->flow->tunnel.ipv6_dst.
* Add more tests for tests/odp.at.
* Enhance validation for external input to prevent over-writing
v9:
* Fix compile warnings
v8:
* Split the patch into multiple patches.
* Fix docs and NEWS to point to version 3.2.
* Move tests from tests/system-userspace-traffic.at
  to tests/system-traffic.at.
v7:
* fix flake8 error
v6:
* add tests that show interoperability between OVS and native kernel's
  implementation in tests/system-userspace-traffic.at.
* fix the documentation.
* add validation in routing header by parse_ipv6_ext_hdrs.
* add parsing implementation and test in tests/odp.at,
  python/ovs/flow/odp.py and python/ovs/tests/test_odp.py.
* fix coding style.
* add build-time assertion on the structure size.
v5:
* rebased on latest master
v4:
* fix alignment on cast
v3:
* fix alignment on cast
v2:
* fix pointer arithmetic

Nobuhiro MIKI (5):
  tests: Define new ADD_VETH_NS macro.
  tnl-ports: Support multiple nw_protos.
  flow: Support rt_hdr in parse_ipv6_ext_hdrs().
  userspace: Add SRv6 tunnel support.
  odp: Add SRv6 tunnel actions.

 Documentation/faq/configuration.rst |  21 +
 Documentation/faq/releases.rst  |   1 +
 NEWS|   2 +
 include/linux/openvswitch.h |   1 +
 include/sparse/netinet/in.h |   1 +
 lib/conntrack.c |   4 +-
 lib/dpif-netlink-rtnl.c |   5 ++
 lib/dpif-netlink.c  |   5 ++
 lib/flow.c  |  48 ---
 lib/flow.h  |   3 +-
 lib/ipf.c   |  15 ++--
 lib/netdev-native-tnl.c | 127 
 lib/netdev-native-tnl.h |  10 +++
 lib/netdev-vport.c  |  53 
 lib/netdev.h|   4 +
 lib/odp-util.c  |  70 +++
 lib/packets.h   |  20 +
 lib/tnl-ports.c |  85 +++
 ofproto/ofproto-dpif-xlate.c|   3 +
 python/ovs/flow/odp.py  |   8 ++
 python/ovs/tests/test_odp.py|  16 
 tests/odp.at|  12 ++-
 tests/system-common-macros.at   |  16 
 tests/system-kmod-macros.at |   8 ++
 tests/system-traffic.at | 119 ++
 tests/system-userspace-macros.at|   6 ++
 tests/tunnel-push-pop-ipv6.at   |  23 +
 tests/tunnel.at |  56 
 28 files changed, 686 insertions(+), 56 deletions(-)

-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v11 1/5] tests: Define new ADD_VETH_NS macro.

2023-03-28 Thread Nobuhiro MIKI
The new ADD_VETH_NS macro creates two netns and connects them
with a veth pair. We can use it for testing in a generic purpose.

e.g.
ADD_VETH_NS([ns1], [p1], [1.1.1.1/24], [ns2], [p2], [1.1.1.2/24])

Signed-off-by: Nobuhiro MIKI 
---
 tests/system-common-macros.at | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at
index 8b9f5c75254f..0077a8609c02 100644
--- a/tests/system-common-macros.at
+++ b/tests/system-common-macros.at
@@ -126,6 +126,22 @@ m4_define([ADD_VETH_BOND],
 ]
 )
 
+# ADD_VETH_NS([ns1], [port1], [ip_addr1], [ns2], [port2], [ip_addr2])
+#
+# Add a pair of veth ports in 'ns1' and 'ns2'. The port names are 'port1'
+# and 'port2' respectively, and the IP addresses 'ip_addr1' and 'ip_addr2'
+# are assigned to each port.
+m4_define([ADD_VETH_NS],
+[ AT_CHECK([ip link add $2 type veth peer name $5]),
+  AT_CHECK([ip link set $2 netns $1])
+  AT_CHECK([ip link set $5 netns $4])
+  NS_CHECK_EXEC([$1], [ip link set $2 up])
+  NS_CHECK_EXEC([$4], [ip link set $5 up])
+  NS_CHECK_EXEC([$1], [ip addr add $3 dev $2])
+  NS_CHECK_EXEC([$4], [ip addr add $6 dev $5])
+]
+)
+
 # ADD_VLAN([port], [namespace], [vlan-id], [ip-addr])
 #
 # Add a VLAN device named 'port' within 'namespace'. It will be configured
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v11 2/5] tnl-ports: Support multiple nw_protos.

2023-03-28 Thread Nobuhiro MIKI
In some tunnels, inner packet needs to support both IPv4
and IPv6. Therefore, this patch improves to allow two
protocols to be tied together in one tunneling.

Signed-off-by: Nobuhiro MIKI 
---
 lib/tnl-ports.c | 80 +
 1 file changed, 48 insertions(+), 32 deletions(-)

diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c
index 050eafa6b8c3..829457ee50f0 100644
--- a/lib/tnl-ports.c
+++ b/lib/tnl-ports.c
@@ -161,40 +161,28 @@ map_insert_ipdev__(struct ip_device *ip_dev, char 
dev_name[],
 }
 }
 
-static uint8_t
-tnl_type_to_nw_proto(const char type[])
+static void
+tnl_type_to_nw_proto(const char type[], uint8_t nw_protos[2])
 {
-if (!strcmp(type, "geneve")) {
-return IPPROTO_UDP;
+nw_protos[0] = nw_protos[1] = 0;
+
+if (!strcmp(type, "geneve") || !strcmp(type, "vxlan") ||
+!strcmp(type, "gtpu")) {
+nw_protos[0] = IPPROTO_UDP;
+} else if (!strcmp(type, "stt")) {
+nw_protos[0] = IPPROTO_TCP;
+} else if (!strcmp(type, "gre") || !strcmp(type, "erspan") ||
+   !strcmp(type, "ip6erspan") || !strcmp(type, "ip6gre")) {
+nw_protos[0] = IPPROTO_GRE;
 }
-if (!strcmp(type, "stt")) {
-return IPPROTO_TCP;
-}
-if (!strcmp(type, "gre") || !strcmp(type, "erspan") ||
-!strcmp(type, "ip6erspan") || !strcmp(type, "ip6gre")) {
-return IPPROTO_GRE;
-}
-if (!strcmp(type, "vxlan")) {
-return IPPROTO_UDP;
-}
-if (!strcmp(type, "gtpu")) {
-return IPPROTO_UDP;
-}
-return 0;
 }
 
-void
-tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port,
-const char dev_name[], const char type[])
+static void
+tnl_port_map_insert__(odp_port_t port, ovs_be16 tp_port,
+  const char dev_name[], uint8_t nw_proto)
 {
 struct tnl_port *p;
 struct ip_device *ip_dev;
-uint8_t nw_proto;
-
-nw_proto = tnl_type_to_nw_proto(type);
-if (!nw_proto) {
-return;
-}
 
 ovs_mutex_lock();
 LIST_FOR_EACH(p, node, _list) {
@@ -220,6 +208,22 @@ out:
 ovs_mutex_unlock();
 }
 
+void
+tnl_port_map_insert(odp_port_t port, ovs_be16 tp_port,
+const char dev_name[], const char type[])
+{
+uint8_t nw_protos[2];
+int i;
+
+tnl_type_to_nw_proto(type, nw_protos);
+
+for (i = 0; i < 2; i++) {
+if (nw_protos[i]) {
+tnl_port_map_insert__(port, tp_port, dev_name, nw_protos[i]);
+}
+}
+}
+
 static void
 tnl_port_unref(const struct cls_rule *cr)
 {
@@ -256,14 +260,11 @@ ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 
tp_port, uint8_t nw_proto)
 }
 }
 
-void
-tnl_port_map_delete(odp_port_t port, const char type[])
+static void
+tnl_port_map_delete__(odp_port_t port, uint8_t nw_proto)
 {
 struct tnl_port *p;
 struct ip_device *ip_dev;
-uint8_t nw_proto;
-
-nw_proto = tnl_type_to_nw_proto(type);
 
 ovs_mutex_lock();
 LIST_FOR_EACH_SAFE (p, node, _list) {
@@ -280,6 +281,21 @@ tnl_port_map_delete(odp_port_t port, const char type[])
 ovs_mutex_unlock();
 }
 
+void
+tnl_port_map_delete(odp_port_t port, const char type[])
+{
+uint8_t nw_protos[2];
+int i;
+
+tnl_type_to_nw_proto(type, nw_protos);
+
+for (i = 0; i < 2; i++) {
+if (nw_protos[i]) {
+tnl_port_map_delete__(port, nw_protos[i]);
+}
+}
+}
+
 /* 'flow' is non-const to allow for temporary modifications during the lookup.
  * Any changes are restored before returning. */
 odp_port_t
-- 
2.31.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev