Commit 4deac4509abbedd6ffaecf27eed01ddefccea40a introduced functionality
in ovn-northd to use a single zone for SNAT and DNAT when possible. It
accounts for certain situations, such as hairpinned traffic, where we
still need a separate SNAT and DNAT zone.

However, one situation it did not account for was when traffic traverses
a logical router and is DNATted as a result of a load balancer, then
when the traffic egresses the router, it needs to be SNATted. In this
situation, we try to use the same CT zone for the SNAT as for the load
balancer DNAT, which does not work.

This commit fixes the issue by installing a flow in the OUT_SNAT stage
of the logical router egress pipeline for each SNAT. This flow will fall
back to using a separate CT zone for SNAT if the ct_label.natted flag is
set.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2044127

Signed-off-by: Mark Michelson <[email protected]>
---
Note: The "SNAT in separate zone from DNAT" test in system-ovn.at can
fail on some systems, and at this point it's not clear what causes it.
On my development system running Fedora 33 and kernel 5.14.18-100, the
test fails because the pings fail. In a container running Fedora 33 and
kernel 5.10.19-200, the test succeeds. It's unknown if it's the kernel
version or some other setting that has a bearing on the success or
failure of the test. The OpenFlow that ovn-controller generates is
identical on both successful and failure scenarios.

If this inconsistency causes issues with CI, then this patch may need to
be re-examined to determine if there are other parameters that may be
needed to correct the referenced issue.
---
 northd/northd.c     |  53 ++++++++++++++------
 tests/ovn-northd.at |  36 ++++++++++++++
 tests/system-ovn.at | 117 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 190 insertions(+), 16 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index b264fb850..866a81310 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -12961,23 +12961,44 @@ build_lrouter_out_snat_flow(struct hmap *lflows, 
struct ovn_datapath *od,
                                 priority, ds_cstr(match),
                                 ds_cstr(actions), &nat->header_);
 
-        if (!stateless) {
-            ds_put_cstr(match, " && "REGBIT_DST_NAT_IP_LOCAL" == 1");
-            ds_clear(actions);
-            if (distributed) {
-                ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
-                              ETH_ADDR_ARGS(mac));
-            }
-            ds_put_format(actions,  REGBIT_DST_NAT_IP_LOCAL" = 0; ct_snat(%s",
-                          nat->external_ip);
-            if (nat->external_port_range[0]) {
-                ds_put_format(actions, ",%s", nat->external_port_range);
-            }
-            ds_put_format(actions, ");");
-            ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
-                                    priority + 1, ds_cstr(match),
-                                    ds_cstr(actions), &nat->header_);
+        if (stateless) {
+            return;
+        }
+
+        struct ds match_natted = DS_EMPTY_INITIALIZER;
+        struct ds actions_natted = DS_EMPTY_INITIALIZER;
+
+        ds_clone(&match_natted, match);
+
+        ds_clear(actions);
+
+        if (distributed) {
+            ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
+                          ETH_ADDR_ARGS(mac));
         }
+
+        ds_clone(&actions_natted, actions);
+
+        ds_put_format(&actions_natted, "ct_snat(%s", nat->external_ip);
+        if (nat->external_port_range[0]) {
+            ds_put_format(&actions_natted, ",%s", nat->external_port_range);
+        }
+        ds_put_format(&actions_natted, ");");
+
+        ds_put_cstr(&match_natted, " && ct_label.natted == 1");
+        ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+                                priority + 2, ds_cstr(&match_natted),
+                                ds_cstr(&actions_natted), &nat->header_);
+
+        ds_put_cstr(match, " && "REGBIT_DST_NAT_IP_LOCAL" == 1");
+        ds_put_format(actions,  REGBIT_DST_NAT_IP_LOCAL" = 0; %s",
+                      ds_cstr(&actions_natted));
+        ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+                                priority + 1, ds_cstr(match),
+                                ds_cstr(actions), &nat->header_);
+
+        ds_destroy(&match_natted);
+        ds_destroy(&actions_natted);
     }
 }
 
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 60d91a771..d9657bcb8 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -5974,6 +5974,42 @@ AT_CHECK([grep -e "(lr_in_ip_routing   ).*outport" 
lr0flows | sed 's/table=../ta
   table=??(lr_in_ip_routing   ), priority=97   , match=(reg7 == 2 && ip4.dst 
== 1.1.1.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.20; reg1 
= 192.168.0.1; eth.src = 00:00:00:00:00:01; outport = "lrp0"; flags.loopback = 
1; next;)
 ])
 
+AT_CLEANUP
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([nat -- SNAT flows for DNAT traffic])
+# This test makes sure that SNAT flows are added that ensure a unique
+# SNAT zone is used for traffic that is natted. We don't need an elaborate
+# setup just to ensure the flow is present. We just need to create the router
+# and add an SNAT to it.
+
+ovn_start
+
+check ovn-nbctl lr-add r0
+check ovn-nbctl lrp-add r0 r0p1 00:00:00:00:00:01 10.0.0.1
+check ovn-nbctl lrp-set-gateway-chassis r0p1 hv1
+check ovn-nbctl lr-nat-add r0 snat 10.0.0.1 192.168.0.1
+check ovn-nbctl --wait=sb sync
+
+ovn-sbctl lflow-list r0 | grep lr_out_snat | sed 's/table=../table=??/' > 
snat_flows
+
+# We expect three SNAT flows for the NAT we created.
+# Priority-161 flow that uses CT zone stored in reg11
+# Priority-162 flow that uses CT zone stored in reg12 if local dnat is detected
+# Priority-163 flow taht uses CT zone stored in reg12 if traffic is natted
+# Since priorities are unique, we can grep based on that and check the 
resulting
+# flows are as expected.
+
+AT_CHECK([grep priority=163 snat_flows], [0], [dnl
+  table=??(lr_out_snat        ), priority=163  , match=(ip && ip4.src == 
192.168.0.1 && outport == "r0p1" && is_chassis_resident("cr-r0p1") && 
ct_label.natted == 1), action=(ct_snat(10.0.0.1);)
+])
+AT_CHECK([grep priority=162 snat_flows], [0], [dnl
+  table=??(lr_out_snat        ), priority=162  , match=(ip && ip4.src == 
192.168.0.1 && outport == "r0p1" && is_chassis_resident("cr-r0p1") && reg9[[4]] 
== 1), action=(reg9[[4]] = 0; ct_snat(10.0.0.1);)
+])
+AT_CHECK([grep priority=161 snat_flows], [0], [dnl
+  table=??(lr_out_snat        ), priority=161  , match=(ip && ip4.src == 
192.168.0.1 && outport == "r0p1" && is_chassis_resident("cr-r0p1")), 
action=(ct_snat_in_czone(10.0.0.1);)
+])
+
 AT_CLEANUP
 ])
 
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index d4c22e7e9..0b29becc8 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -7975,3 +7975,120 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
patch-.*/d
 
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([SNAT in separate zone from DNAT])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# The goal of this test is to ensure that when traffic is first DNATted
+# (by way of a load balancer), and then SNATted, the SNAT happens in a
+# separate conntrack zone from the DNAT.
+
+start_daemon ovn-controller
+
+check ovn-nbctl ls-add public
+
+check ovn-nbctl lr-add r1
+check ovn-nbctl lrp-add r1 r1_public 00:de:ad:ff:00:01 172.16.0.1/16
+check ovn-nbctl lrp-add r1 r1_s1 00:de:ad:fe:00:01 173.0.1.1/24
+check ovn-nbctl lrp-set-gateway-chassis r1_public hv1
+
+check ovn-nbctl lb-add r1_lb 30.0.0.1 172.16.0.102
+check ovn-nbctl lr-lb-add r1 r1_lb
+
+check ovn-nbctl ls-add s1
+check ovn-nbctl lsp-add s1 s1_r1
+check ovn-nbctl lsp-set-type s1_r1 router
+check ovn-nbctl lsp-set-addresses s1_r1 router
+check ovn-nbctl lsp-set-options s1_r1 router-port=r1_s1
+
+check ovn-nbctl lsp-add s1 vm1
+check ovn-nbctl lsp-set-addresses vm1 "00:de:ad:01:00:01 173.0.1.2"
+
+check ovn-nbctl lsp-add public public_r1
+check ovn-nbctl lsp-set-type public_r1 router
+check ovn-nbctl lsp-set-addresses public_r1 router
+check ovn-nbctl lsp-set-options public_r1 router-port=r1_public 
nat-addresses=router
+
+check ovn-nbctl lr-add r2
+check ovn-nbctl lrp-add r2 r2_public 00:de:ad:ff:00:02 172.16.0.2/16
+check ovn-nbctl lrp-add r2 r2_s2 00:de:ad:fe:00:02 173.0.2.1/24
+check ovn-nbctl lr-nat-add r2 dnat_and_snat 172.16.0.102 173.0.2.2
+check ovn-nbctl lrp-set-gateway-chassis r2_public hv1
+
+check ovn-nbctl ls-add s2
+check ovn-nbctl lsp-add s2 s2_r2
+check ovn-nbctl lsp-set-type s2_r2 router
+check ovn-nbctl lsp-set-addresses s2_r2 router
+check ovn-nbctl lsp-set-options s2_r2 router-port=r2_s2
+
+check ovn-nbctl lsp-add s2 vm2
+check ovn-nbctl lsp-set-addresses vm2 "00:de:ad:01:00:02 173.0.2.2"
+
+check ovn-nbctl lsp-add public public_r2
+check ovn-nbctl lsp-set-type public_r2 router
+check ovn-nbctl lsp-set-addresses public_r2 router
+check ovn-nbctl lsp-set-options public_r2 router-port=r2_public 
nat-addresses=router
+
+ADD_NAMESPACES(vm1)
+ADD_VETH(vm1, vm1, br-int, "173.0.1.2/24", "00:de:ad:01:00:01", \
+         "173.0.1.1")
+ADD_NAMESPACES(vm2)
+ADD_VETH(vm2, vm2, br-int, "173.0.2.2/24", "00:de:ad:01:00:02", \
+         "173.0.2.1")
+
+check ovn-nbctl lr-nat-add r1 dnat_and_snat 172.16.0.101 173.0.1.2 vm1 
00:00:00:01:02:03
+check ovn-nbctl --wait=hv sync
+
+# Next, make sure that a ping works as expected
+NS_CHECK_EXEC([vm1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.1 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Finally, make sure that conntrack shows two separate zones being used for
+# DNAT and SNAT
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=173.0.1.2,dst=30.0.0.1,id=<cleared>,type=8,code=0),reply=(src=172.16.0.102,dst=173.0.1.2,id=<cleared>,type=0,code=0),zone=<cleared>,labels=0x2
+])
+
+# The final two entries appear identical here. That is because FORMAT_CT
+# scrubs the zone numbers. In actuality, the zone numbers are different,
+# which is why there are two entries.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.102) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.0.101,dst=172.16.0.102,id=<cleared>,type=8,code=0),reply=(src=173.0.2.2,dst=172.16.0.101,id=<cleared>,type=0,code=0),zone=<cleared>
+icmp,orig=(src=173.0.1.2,dst=172.16.0.102,id=<cleared>,type=8,code=0),reply=(src=172.16.0.102,dst=172.16.0.101,id=<cleared>,type=0,code=0),zone=<cleared>
+icmp,orig=(src=173.0.1.2,dst=172.16.0.102,id=<cleared>,type=8,code=0),reply=(src=172.16.0.102,dst=172.16.0.101,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])
-- 
2.31.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to