Commit 4deac4509abbedd6ffaecf27eed01ddefccea40a introduced functionality
in ovn-northd to use a single zone for SNAT and DNAT when possible. It
accounts for certain situations, such as hairpinned traffic, where we
still need a separate SNAT and DNAT zone.

However, one situation it did not account for was when traffic traverses
a logical router and is DNATted as a result of a load balancer, then
when the traffic egresses the router, it needs to be SNATted. In this
situation, we try to use the same CT zone for the SNAT as for the load
balancer DNAT, which does not work.

This commit fixes the issue by setting the DNAT_LOCAL bit in the initial
stage of the egress pipeline if the packet was dnatted during the
ingress pipeline. This ensures that when the SNAT stage is reached, a
separate CT zone is used for SNAT.

Signed-off-by: Mark Michelson <[email protected]>
---
v1 -> v2
* Switched from setting flows in SNAT stage to using CHK_DNAT_LOCAL
  stage, as suggested by Numan. Updated the commit message to indicate
  this.
* Removed ovn-northd test in favor of moving the checks to the existing
  NAT configuration check test. This also resulted in fixing these
  tests, since they were failing with my first patch applied.
* Added new flow documentation to ovn-northd.8.xml
* Added notes to both northd.c and ovn-northd.8.xml that this relies on
  the ct_label.natted bit to carry its status from the ingress to egress
  pipelines.
---
 northd/northd.c         |  17 ++++++
 northd/ovn-northd.8.xml |  16 ++++++
 tests/ovn-northd.at     |   7 ++-
 tests/system-ovn.at     | 117 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/northd/northd.c b/northd/northd.c
index b264fb850..637cb8534 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -13388,6 +13388,23 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath 
*od, struct hmap *lflows,
         }
     }
 
+    if (od->nbr->n_nat) {
+        ds_clear(match);
+        ds_put_cstr(match, "ip && ct_label.natted == 1");
+        /* This flow is unique since it is in the egress pipeline but checks 
the
+         * value of ct_label.natted, which would have been set in the ingress
+         * pipeline. If a change is ever introduced that clears or otherwise
+         * invalidates the ct_label between the ingress and egress pipelines, 
then
+         * an alternative will need to be devised.
+         */
+        ds_clear(actions);
+        ds_put_cstr(actions, REGBIT_DST_NAT_IP_LOCAL" = 1; next;");
+        ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_CHECK_DNAT_LOCAL,
+                                50, ds_cstr(match), ds_cstr(actions),
+                                &od->nbr->header_);
+
+    }
+
     /* Handle force SNAT options set in the gateway router. */
     if (od->is_gw_router) {
         if (dnat_force_snat_ip) {
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 4784bff04..06ef74dcc 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -4310,6 +4310,22 @@ nd_ns {
       </li>
     </ul>
 
+    <p>
+      This table also installs a priority-50 logical flow for each logical
+      router that has NATs configured on it. The flow has match
+      <code>ip &amp;&amp; ct_label.natted == 1</code> and action
+      <code>REGBIT_DST_NAT_IP_LOCAL = 1; next;</code>. This is intended
+      to ensure that traffic that was DNATted locally will use a separate
+      conntrack zone for SNAT if SNAT is required later in the egress
+      pipeline. Note that this flow checks the value of
+      <code>ct_label.natted</code>, which is set in the ingress pipeline.
+      This means that ovn-northd assumes that this value is carried over
+      from the ingress pipeline to the egress pipeline and is not altered
+      or cleared. If conntrack label values are ever changed to be cleared
+      between the ingress and egress pipelines, then the match conditions
+      of this flow will be updated accordingly.
+    </p>
+
     <h3>Egress Table 1: UNDNAT</h3>
 
     <p>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 60d91a771..3b3866fa2 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -4983,6 +4983,7 @@ AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl
 
 AT_CHECK([grep "lr_out_chk_dnat_local" lr0flows | sed 's/table=./table=?/' | 
sort], [0], [dnl
   table=? (lr_out_chk_dnat_local), priority=0    , match=(1), 
action=(reg9[[4]] = 0; next;)
+  table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && 
ct_label.natted == 1), action=(reg9[[4]] = 1; next;)
   table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && ip4.dst == 
172.168.0.10 && is_chassis_resident("cr-lr0-public")), action=(reg9[[4]] = 1; 
next;)
   table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && ip4.dst == 
172.168.0.20 && is_chassis_resident("cr-lr0-public")), action=(reg9[[4]] = 1; 
next;)
   table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && ip4.dst == 
172.168.0.30 && is_chassis_resident("cr-lr0-public")), action=(reg9[[4]] = 1; 
next;)
@@ -5057,6 +5058,7 @@ AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl
 
 AT_CHECK([grep "lr_out_chk_dnat_local" lr0flows | sed 's/table=./table=?/' | 
sort], [0], [dnl
   table=? (lr_out_chk_dnat_local), priority=0    , match=(1), 
action=(reg9[[4]] = 0; next;)
+  table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && 
ct_label.natted == 1), action=(reg9[[4]] = 1; next;)
   table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && ip4.dst == 
172.168.0.10 && is_chassis_resident("cr-lr0-public")), action=(reg9[[4]] = 1; 
next;)
   table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && ip4.dst == 
172.168.0.20 && is_chassis_resident("cr-lr0-public")), action=(reg9[[4]] = 1; 
next;)
   table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && ip4.dst == 
172.168.0.30 && is_chassis_resident("cr-lr0-public")), action=(reg9[[4]] = 1; 
next;)
@@ -5125,6 +5127,7 @@ AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl
 
 AT_CHECK([grep "lr_out_chk_dnat_local" lr0flows | sed 's/table=./table=?/' | 
sort], [0], [dnl
   table=? (lr_out_chk_dnat_local), priority=0    , match=(1), 
action=(reg9[[4]] = 0; next;)
+  table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && 
ct_label.natted == 1), action=(reg9[[4]] = 1; next;)
 ])
 
 AT_CHECK([grep "lr_out_undnat" lr0flows | sed 's/table=./table=?/' | sort], 
[0], [dnl
@@ -5184,6 +5187,7 @@ AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl
 
 AT_CHECK([grep "lr_out_chk_dnat_local" lr0flows | sed 's/table=./table=?/' | 
sort], [0], [dnl
   table=? (lr_out_chk_dnat_local), priority=0    , match=(1), 
action=(reg9[[4]] = 0; next;)
+  table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && 
ct_label.natted == 1), action=(reg9[[4]] = 1; next;)
 ])
 
 AT_CHECK([grep "lr_out_undnat" lr0flows | sed 's/table=./table=?/' | sort], 
[0], [dnl
@@ -5249,6 +5253,7 @@ AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl
 
 AT_CHECK([grep "lr_out_chk_dnat_local" lr0flows | sed 's/table=./table=?/' | 
sort], [0], [dnl
   table=? (lr_out_chk_dnat_local), priority=0    , match=(1), 
action=(reg9[[4]] = 0; next;)
+  table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && 
ct_label.natted == 1), action=(reg9[[4]] = 1; next;)
 ])
 
 AT_CHECK([grep "lr_out_undnat" lr0flows | sed 's/table=./table=?/' | sort], 
[0], [dnl
@@ -5327,6 +5332,7 @@ AT_CHECK([grep "lr_in_dnat" lr0flows | sort], [0], [dnl
 
 AT_CHECK([grep "lr_out_chk_dnat_local" lr0flows | sed 's/table=./table=?/' | 
sort], [0], [dnl
   table=? (lr_out_chk_dnat_local), priority=0    , match=(1), 
action=(reg9[[4]] = 0; next;)
+  table=? (lr_out_chk_dnat_local), priority=50   , match=(ip && 
ct_label.natted == 1), action=(reg9[[4]] = 1; next;)
 ])
 
 AT_CHECK([grep "lr_out_undnat" lr0flows | sed 's/table=./table=?/' | sort], 
[0], [dnl
@@ -5975,7 +5981,6 @@ AT_CHECK([grep -e "(lr_in_ip_routing   ).*outport" 
lr0flows | sed 's/table=../ta
 ])
 
 AT_CLEANUP
-])
 
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([check exclude-lb-vips-from-garp option])
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index d4c22e7e9..0b29becc8 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -7975,3 +7975,120 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
patch-.*/d
 
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([SNAT in separate zone from DNAT])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# The goal of this test is to ensure that when traffic is first DNATted
+# (by way of a load balancer), and then SNATted, the SNAT happens in a
+# separate conntrack zone from the DNAT.
+
+start_daemon ovn-controller
+
+check ovn-nbctl ls-add public
+
+check ovn-nbctl lr-add r1
+check ovn-nbctl lrp-add r1 r1_public 00:de:ad:ff:00:01 172.16.0.1/16
+check ovn-nbctl lrp-add r1 r1_s1 00:de:ad:fe:00:01 173.0.1.1/24
+check ovn-nbctl lrp-set-gateway-chassis r1_public hv1
+
+check ovn-nbctl lb-add r1_lb 30.0.0.1 172.16.0.102
+check ovn-nbctl lr-lb-add r1 r1_lb
+
+check ovn-nbctl ls-add s1
+check ovn-nbctl lsp-add s1 s1_r1
+check ovn-nbctl lsp-set-type s1_r1 router
+check ovn-nbctl lsp-set-addresses s1_r1 router
+check ovn-nbctl lsp-set-options s1_r1 router-port=r1_s1
+
+check ovn-nbctl lsp-add s1 vm1
+check ovn-nbctl lsp-set-addresses vm1 "00:de:ad:01:00:01 173.0.1.2"
+
+check ovn-nbctl lsp-add public public_r1
+check ovn-nbctl lsp-set-type public_r1 router
+check ovn-nbctl lsp-set-addresses public_r1 router
+check ovn-nbctl lsp-set-options public_r1 router-port=r1_public 
nat-addresses=router
+
+check ovn-nbctl lr-add r2
+check ovn-nbctl lrp-add r2 r2_public 00:de:ad:ff:00:02 172.16.0.2/16
+check ovn-nbctl lrp-add r2 r2_s2 00:de:ad:fe:00:02 173.0.2.1/24
+check ovn-nbctl lr-nat-add r2 dnat_and_snat 172.16.0.102 173.0.2.2
+check ovn-nbctl lrp-set-gateway-chassis r2_public hv1
+
+check ovn-nbctl ls-add s2
+check ovn-nbctl lsp-add s2 s2_r2
+check ovn-nbctl lsp-set-type s2_r2 router
+check ovn-nbctl lsp-set-addresses s2_r2 router
+check ovn-nbctl lsp-set-options s2_r2 router-port=r2_s2
+
+check ovn-nbctl lsp-add s2 vm2
+check ovn-nbctl lsp-set-addresses vm2 "00:de:ad:01:00:02 173.0.2.2"
+
+check ovn-nbctl lsp-add public public_r2
+check ovn-nbctl lsp-set-type public_r2 router
+check ovn-nbctl lsp-set-addresses public_r2 router
+check ovn-nbctl lsp-set-options public_r2 router-port=r2_public 
nat-addresses=router
+
+ADD_NAMESPACES(vm1)
+ADD_VETH(vm1, vm1, br-int, "173.0.1.2/24", "00:de:ad:01:00:01", \
+         "173.0.1.1")
+ADD_NAMESPACES(vm2)
+ADD_VETH(vm2, vm2, br-int, "173.0.2.2/24", "00:de:ad:01:00:02", \
+         "173.0.2.1")
+
+check ovn-nbctl lr-nat-add r1 dnat_and_snat 172.16.0.101 173.0.1.2 vm1 
00:00:00:01:02:03
+check ovn-nbctl --wait=hv sync
+
+# Next, make sure that a ping works as expected
+NS_CHECK_EXEC([vm1], [ping -q -c 3 -i 0.3 -w 2 30.0.0.1 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Finally, make sure that conntrack shows two separate zones being used for
+# DNAT and SNAT
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(30.0.0.1) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=173.0.1.2,dst=30.0.0.1,id=<cleared>,type=8,code=0),reply=(src=172.16.0.102,dst=173.0.1.2,id=<cleared>,type=0,code=0),zone=<cleared>,labels=0x2
+])
+
+# The final two entries appear identical here. That is because FORMAT_CT
+# scrubs the zone numbers. In actuality, the zone numbers are different,
+# which is why there are two entries.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(172.16.0.102) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.0.101,dst=172.16.0.102,id=<cleared>,type=8,code=0),reply=(src=173.0.2.2,dst=172.16.0.101,id=<cleared>,type=0,code=0),zone=<cleared>
+icmp,orig=(src=173.0.1.2,dst=172.16.0.102,id=<cleared>,type=8,code=0),reply=(src=172.16.0.102,dst=172.16.0.101,id=<cleared>,type=0,code=0),zone=<cleared>
+icmp,orig=(src=173.0.1.2,dst=172.16.0.102,id=<cleared>,type=8,code=0),reply=(src=172.16.0.102,dst=172.16.0.101,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])
-- 
2.31.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to