Hi Lorenzo,

I tried running the system test from this patch against ovn main and it passes even without the changes in northd. I think this is because the alice1 and foo1 ports need to be bound on different hvs to properly test the changes in northd.

See below for other comments.

On 5/24/23 13:56, Lorenzo Bianconi wrote:
In the current codebase for distributed gw router port use-case,
it is not possible to add a load balancer that redirects the traffic
to a back-end if it is used as internal IP of a FIP NAT rule since
the reply traffic is never centralized. Fix the issue centralizing the
traffic if it is the reply packet for the load balancer.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2023609
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
Changes since v2:
- rebase on top of ovn main branch
- fix spelling
Changes since v1:
- add new system-test and unit-test
---
  northd/northd.c         |  15 ++++++
  northd/ovn-northd.8.xml |  16 +++++++
  tests/ovn-northd.at     |  48 +++++++++++++++++++
  tests/system-ovn.at     | 100 ++++++++++++++++++++++++++++++++++++++++
  4 files changed, 179 insertions(+)

diff --git a/northd/northd.c b/northd/northd.c
index a6eca916b..6317c36fd 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10740,6 +10740,8 @@ build_distr_lrouter_nat_flows_for_lb(struct 
lrouter_nat_lb_flows_ctx *ctx,
                                       struct ovn_datapath *od)
  {
      struct ovn_port *dgp = od->l3dgw_ports[0];
+    struct ds gw_redir_match = DS_EMPTY_INITIALIZER;
+    struct ds gw_redir_action = DS_EMPTY_INITIALIZER;
const char *undnat_action; @@ -10784,6 +10786,17 @@ build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx,
          return;
      }
+ /* We need to centralize the LB traffic to properly perform
+     * the undnat stage.
+     */
+    ds_put_format(&gw_redir_match, "%s) && outport == %s",
+                  ds_cstr(ctx->undnat_match), dgp->json_key);
+    ds_put_format(&gw_redir_action, "outport = %s; next;",
+                  dgp->cr_port->json_key);
+    ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_IN_GW_REDIRECT,
+                            200, ds_cstr(&gw_redir_match),
+                            ds_cstr(&gw_redir_action), &ctx->lb->nlb->header_);
+
      ds_put_format(ctx->undnat_match, ") && (inport == %s || outport == %s)"
                    " && is_chassis_resident(%s)", dgp->json_key, dgp->json_key,
                    dgp->cr_port->json_key);
@@ -10791,6 +10804,8 @@ build_distr_lrouter_nat_flows_for_lb(struct 
lrouter_nat_lb_flows_ctx *ctx,
                              ds_cstr(ctx->undnat_match), undnat_action,
                              &ctx->lb->nlb->header_);
      ds_truncate(ctx->undnat_match, undnat_match_len);
+    ds_destroy(&gw_redir_match);
+    ds_destroy(&gw_redir_action);
  }
static void
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 540fe03bd..295f52b11 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -4578,6 +4578,22 @@ icmp6 {
      </p>
<ul>
+      <li>
+        For all the configured load balancing rules that include an IPv4
+        address <var>VIP</var>, and a list of IPv4 backend addresses
+        <var>B0</var>, <var>B1</var> .. <var>Bn</var> defined for the
+        <var>VIP</var> a priority-200 flow is added that matches <code>
+        ip4 &amp;&amp; (ip4.src == <var>B0</var> || ip4.src == <var>B1</var>
+        || ... || ip4.src == <var>Bn</var>)</code> with an action <code>
+        outport = <var>CR</var>; next;</code> where <var>CR</var> is the
+        <code>chassisredirect</code> port representing the instance of the
+        logical router distributed gateway port on the gateway chassis.
+        If the backend IPv4 address <var>Bx</var> is also configured with
+        L4 port <var>PORT</var> of protocol <var>P</var>, then the match
+        also includes <code>P.src</code> == <var>PORT</var>.
+        Similar flows are addeded for IPv6.

s/addeded/added/

+      </li>
+
        <li>
          For each NAT rule in the OVN Northbound database that can
          be handled in a distributed manner, a priority-100 logical
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index e3669bdf5..59f932c5f 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -9484,3 +9484,51 @@ AT_CHECK([ovn-sbctl lflow-list sw | grep ls_out_pre_lb | 
grep priority=110 | gre
AT_CLEANUP
  ])
+
+OVN_FOR_EACH_NORTHD_NO_HV([
+AT_SETUP([check fip and lb flows])
+AT_KEYWORDS([fip-lb-flows])
+ovn_start
+
+ovn-nbctl lr-add R1
+ovn-nbctl lrp-add R1 R1-S0 02:ac:10:01:00:01 10.0.0.1/24 1000::a/64
+ovn-nbctl lrp-add R1 R1-PUB 02:ac:20:01:01:01 172.16.0.1/24 3000::a/64
+ovn-nbctl lrp-set-gateway-chassis R1-PUB hv1 20
+
+ovn-nbctl ls-add S0
+ovn-nbctl lsp-add S0 S0-R1
+ovn-nbctl lsp-set-type S0-R1 router
+ovn-nbctl lsp-set-addresses S0-R1 02:ac:10:01:00:01
+ovn-nbctl lsp-set-options S0-R1 router-port=R1-S0
+ovn-nbctl lsp-add S0 S0-P0
+ovn-nbctl lsp-set-addresses S0-P0 "50:54:00:00:00:03 10.0.0.3 1000::3"
+
+ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.0.110 10.0.0.3 S0-P0 
30:54:00:00:00:03
+ovn-nbctl lr-nat-add R1 dnat_and_snat 3000::c 1000::3 S0-P0 40:54:00:00:00:03

Nit: All ovn-nbctl commands should be replaced with `check ovn-nbctl` instead. This has two benefits:

1) If syntax breaks for any of the commands at some point, the test will fail as early as possible. 2) The check() function echoes the command you are executing, so the testsuite.log file will have more information in it about what all was run.

I won't bother pointing out all of the instances where this can be done. A basic rule to follow is that if you are writing a command that outputs nothing to stdout, it's a good idea to wrap it in the check() function.

+ovn-sbctl dump-flows R1 > R1flows
+AT_CAPTURE_FILE([R1flows])
+AT_CHECK([grep "lr_in_gw_redirect" R1flows | sort], [0], [dnl
+  table=20(lr_in_gw_redirect  ), priority=0    , match=(1), action=(next;)
+  table=20(lr_in_gw_redirect  ), priority=100  , match=(ip4.src == 10.0.0.3 && outport == 
"R1-PUB" && is_chassis_resident("S0-P0")), action=(eth.src = 30:54:00:00:00:03; reg1 = 
172.16.0.110; next;)
+  table=20(lr_in_gw_redirect  ), priority=100  , match=(ip6.src == 1000::3 && outport == 
"R1-PUB" && is_chassis_resident("S0-P0")), action=(eth.src = 40:54:00:00:00:03; xxreg1 
= 3000::c; next;)
+  table=20(lr_in_gw_redirect  ), priority=50   , match=(outport == "R1-PUB"), 
action=(outport = "cr-R1-PUB"; next;)
+])

As someone who just had to make a major change to the table numbers in northd, I'm now going to suggest that any checks of logical flows should have their table numbers anonymized. This allows for changes to be made to the tables without affecting northd tests.

Something like:

grep "lr_in_gw_redirect" R1flows | sort | sed s'table=../table=??'

will work here.

The same goes for the logical flow check that is performed lower down as well.

+
+ovn-nbctl lb-add lb_tcp4 172.16.0.100:50001 
10.0.0.2:50001,10.0.0.3:50001,10.0.0.4:50001 tcp
+ovn-nbctl lb-add lb_tcp6 [[1000::1]]:50001 [[1000::3]]:8080
+ovn-nbctl --wait=sb lr-lb-add R1 lb_tcp4
+ovn-nbctl --wait=sb lr-lb-add R1 lb_tcp6
+ovn-sbctl dump-flows R1 > R1flows
+AT_CAPTURE_FILE([R1flows])
+AT_CHECK([grep "lr_in_gw_redirect" R1flows | sort], [0], [dnl
+  table=20(lr_in_gw_redirect  ), priority=0    , match=(1), action=(next;)
+  table=20(lr_in_gw_redirect  ), priority=100  , match=(ip4.src == 10.0.0.3 && outport == 
"R1-PUB" && is_chassis_resident("S0-P0")), action=(eth.src = 30:54:00:00:00:03; reg1 = 
172.16.0.110; next;)
+  table=20(lr_in_gw_redirect  ), priority=100  , match=(ip6.src == 1000::3 && outport == 
"R1-PUB" && is_chassis_resident("S0-P0")), action=(eth.src = 40:54:00:00:00:03; xxreg1 
= 3000::c; next;)
+  table=20(lr_in_gw_redirect  ), priority=200  , match=(ip4 && ((ip4.src == 10.0.0.2 && tcp.src == 50001) || (ip4.src == 
10.0.0.3 && tcp.src == 50001) || (ip4.src == 10.0.0.4 && tcp.src == 50001)) && outport == "R1-PUB"), 
action=(outport = "cr-R1-PUB"; next;)
+  table=20(lr_in_gw_redirect  ), priority=200  , match=(ip6 && ((ip6.src == 1000::3 && tcp.src == 8080)) 
&& outport == "R1-PUB"), action=(outport = "cr-R1-PUB"; next;)
+  table=20(lr_in_gw_redirect  ), priority=50   , match=(outport == "R1-PUB"), 
action=(outport = "cr-R1-PUB"; next;)
+])
+
+AT_CLEANUP
+])
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index c2490008d..cd9bb7433 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -11546,3 +11546,103 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
patch-.*/d
AT_CLEANUP
  ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([DNAT_SNAT and LB traffic])
+AT_SKIP_IF([test $HAVE_NC = no])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+AT_KEYWORDS([dnat-snat-lb])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+ovn-nbctl lr-add R1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add alice
+
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lrp-add R1 alice 00:00:02:01:02:03 172.16.1.1/24 \
+    -- lrp-set-gateway-chassis alice hv1
+
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+    type=router options:router-port=foo \
+    -- lsp-set-addresses rp-foo router
+
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+    type=router options:router-port=alice \
+    -- lsp-set-addresses rp-alice router
+
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+         "192.168.1.1")
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 192.168.1.2"
+
+# Logical port 'alice1' in switch 'alice'.
+ADD_NAMESPACES(alice1)
+ADD_VETH(alice1, alice1, br-int, "172.16.1.2/24", "f0:00:00:01:02:05", \
+         "172.16.1.1")
+ovn-nbctl lsp-add alice alice1 \
+-- lsp-set-addresses alice1 "f0:00:00:01:02:05 172.16.1.2"
+
+AT_CHECK([ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.1.2 foo1 
00:00:02:02:03:04])
+AT_CHECK([ovn-nbctl lr-nat-add R1 snat 172.16.1.1 192.168.1.0/24])

nit: It's simpler to use the check function instead of AT_CHECK here. As I stated in the ovn-northd test you added, it has the benefit of echoing the command to testsuite.log as well.

+
+OVS_START_L7([foo1], [http])
+
+NS_CHECK_EXEC([alice1], [tcpdump -l -c 6 -neei alice1 host 172.16.1.3 and tcp > 
dna_snat.pcap 2>dna_snat_err &])
+OVS_WAIT_UNTIL([grep "listening" dna_snat_err])
+
+ovn-nbctl --wait=hv sync
+
+NS_CHECK_EXEC([alice1], [nc 172.16.1.3 80 -z], [0], [ignore], [ignore])
+
+OVS_WAIT_UNTIL([
+    n_pkt=$(cat dna_snat.pcap | wc -l)
+    test "${n_pkt}" = "6"
+])

Why do we expect 6 packets?

+
+ovn-nbctl lb-add lb0 172.16.1.100:80 192.168.1.2:80 tcp
+ovn-nbctl --wait=hv lr-lb-add R1 lb0
+
+NS_CHECK_EXEC([alice1], [tcpdump -l -c 6 -neei alice1 host 172.16.1.100 and tcp > 
lb.pcap 2>lb_err &])

If I understand tcpdump properly, "host 172.16.1.100" will satisfy any packet that has src or dst address set to 172.16.1.100. The nc below sends traffic to 172.16.1.100, meaning that tcpdump will capture the request traffic. Since our goal is to ensure that reply traffic is reaching alice1, would it be better to use "dst net 172.16.1.2" as our filter?

+OVS_WAIT_UNTIL([grep "listening" lb_err])
+
+NS_CHECK_EXEC([alice1], [nc 172.16.1.100 80 -z], [0], [ignore], [ignore])
+
+OVS_WAIT_UNTIL([
+    n_pkt=$(cat lb.pcap | wc -l)
+    test "${n_pkt}" = "6"
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to