From: Numan Siddique <[email protected]>

When there are ECMP symmetric static routes configured, OVN selects
one of the next hop for the traffic originated from within the
cluster.  For the subsequent packets to the same destination,
OVN may select a different next hop (which is fine).  But there can
be certain usecases, where the next hop entity can be stateful and
selecting the same next hop is desirable.

This patch address this usecase in the following way

   1.  For the first packet originating from the OVN logical port
       VIF, OVN selects a next hop 'A' and forwards the traffic to
       it.

   2.  When the reply traffic is received (either from next hop 'A'
       or any other next hop), it commits the connection in the
       DNAT zone of the logical router and saves the state in
       ct_label.ecmp_reply_eth and ct_label.ecmp_reply_port.
       Note that we already support this for the traffic
       originating from an ECMP route [1].  We are now extending
       the same for the traffic originating from the cluster towards
       the ECMP route.

    3. For the subsequent packets from the cluster, we select the
       next hop eth address and the port from the saved conntrack
       state.  This is straightforward as we anyway send the packet
       to the DNAT zone of the logical router.

Example: If a logical router lr0 is configured with the below
EMCP static routes

ovn-nbctl lr-route-list lr0
IPv4 Routes
Route Table <main>:
                0.0.0.0/0                172.20.0.1 dst-ip ecmp
                0.0.0.0/0                172.20.0.2 dst-ip ecmp 
ecmp-symmetric-reply

Before this patch, we were adding the below logical flows in the router
pipeline for the ECMP route handling:

-------------
table=10(lr_in_ecmp_stateful), priority=100  , match=(inport == "lr0-public" && 
ip4.src == 0.0.0.0/0 && !ct.rpl && (ct.new || ct.est)), action=(ct_commit { 
ct_label.ecmp_reply_eth = eth.src;  ct_mark.ecmp_reply_port = 3;}; next;)
table=14(lr_in_ip_routing   ), priority=10300, match=(ct.rpl && 
ct_mark.ecmp_reply_port == 3 && reg7 == 0 && ip4.dst == 0.0.0.0/0), 
action=(ip.ttl--; flags.loopback = 1; eth.src = 00:11:22:00:ff:01; reg1 = 
172.20.0.100; outport = "lr0-public"; next;)
table=16(lr_in_policy       ), priority=65535, match=(ct.rpl && 
ct_mark.ecmp_reply_port == 3), action=(next;)
table=20(lr_in_arp_resolve  ), priority=200  , match=(ct.rpl && 
ct_mark.ecmp_reply_port == 3), action=(push(xxreg1); xxreg1 = ct_label; eth.dst 
= xxreg1[32..79]; pop(xxreg1); next;)
-------------

After this patch, we add the below logical flows:

--------------
table=10(lr_in_ecmp_stateful), priority=100  , match=(inport == "lr0-public" && 
ip4.src == 0.0.0.0/0 && (ct.new || ct.est)), action=(ct_commit { 
ct_label.ecmp_reply_eth = eth.src;  ct_mark.ecmp_reply_port = 3;}; next;)
table=14(lr_in_ip_routing   ), priority=10300, match=(ct_mark.ecmp_reply_port 
== 3 && reg7 == 0 && ip4.dst == 0.0.0.0/0), action=(ip.ttl--; flags.loopback = 
1; eth.src = 00:11:22:00:ff:01; reg1 = 172.20.0.100; outport = "lr0-public"; 
next;)
table=16(lr_in_policy       ), priority=65535, match=(ct_mark.ecmp_reply_port 
== 3), action=(next;)
table=20(lr_in_arp_resolve  ), priority=200  , match=(ct_mark.ecmp_reply_port 
== 3), action=(push(xxreg1); xxreg1 = ct_label; eth.dst = xxreg1[32..79]; 
pop(xxreg1); next;)
--------------

[1] - 4fdca656857d ("Add ECMP symmetric replies.")
Reported-at: https://issues.redhat.com/browse/FDP-628
Signed-off-by: Numan Siddique <[email protected]>
---
 northd/northd.c     |  4 ++--
 tests/ovn-northd.at | 26 +++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 6898daa00d..3227c093cb 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10706,7 +10706,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table 
*lflows,
      * NOTE: we purposely are not clearing match before this
      * ds_put_cstr() call. The previous contents are needed.
      */
-    ds_put_cstr(&match, " && !ct.rpl && (ct.new || ct.est)");
+    ds_put_cstr(&match, " && (ct.new || ct.est)");
     ds_put_format(&actions,
             "ct_commit { ct_label.ecmp_reply_eth = eth.src; "
             " %s = %" PRId64 ";}; "
@@ -10721,7 +10721,7 @@ add_ecmp_symmetric_reply_flows(struct lflow_table 
*lflows,
      * for where to route the packet.
      */
     ds_put_format(&ecmp_reply,
-                  "ct.rpl && %s == %"PRId64,
+                  "%s == %"PRId64,
                   ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
     ds_clear(&match);
     ds_put_format(&match, "%s && %s", ds_cstr(&ecmp_reply),
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index a389d19886..98a5006cb5 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6722,8 +6722,14 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply 
lr-route-add lr0 1.0.0.1 192.16
 
 ovn-sbctl dump-flows lr0 > lr0flows
 
-AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |sort], [0], [dnl
+AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
+  table=??(lr_in_ip_routing   ), priority=0    , match=(1), action=(drop;)
+  table=??(lr_in_ip_routing   ), priority=10550, match=(nd_rs || nd_ra), 
action=(drop;)
+  table=??(lr_in_ip_routing   ), priority=194  , match=(inport == "lr0-public" 
&& ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = 
ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; 
outport = "lr0-public"; flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=74   , match=(ip4.dst == 
192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; 
flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=97   , match=(reg7 == 0 && ip4.dst 
== 1.0.0.1/32), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 
= 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; 
flags.loopback = 1; next;)
 ])
+
 AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], 
[dnl
   table=??(lr_in_ip_routing_ecmp), priority=0    , match=(1), action=(drop;)
   table=??(lr_in_ip_routing_ecmp), priority=150  , match=(reg8[[0..15]] == 0), 
action=(next;)
@@ -6732,7 +6738,12 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | 
ovn_strip_lflows], [0], [dn
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
192.168.0.20
 
 ovn-sbctl dump-flows lr0 > lr0flows
-AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], 
[0], [dnl
+AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
+  table=??(lr_in_ip_routing   ), priority=0    , match=(1), action=(drop;)
+  table=??(lr_in_ip_routing   ), priority=10300, 
match=(ct_label.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), 
action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 
192.168.0.1; outport = "lr0-public"; next;)
+  table=??(lr_in_ip_routing   ), priority=10550, match=(nd_rs || nd_ra), 
action=(drop;)
+  table=??(lr_in_ip_routing   ), priority=194  , match=(inport == "lr0-public" 
&& ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = 
ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; 
outport = "lr0-public"; flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=74   , match=(ip4.dst == 
192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; 
flags.loopback = 1; next;)
   table=??(lr_in_ip_routing   ), priority=97   , match=(reg7 == 0 && ip4.dst 
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; 
reg8[[16..31]] = select(1, 2);)
 ])
 AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 
's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl
@@ -6751,7 +6762,7 @@ AT_CHECK([grep -e "lr_in_defrag" lr0flows | 
ovn_strip_lflows], [0], [dnl
 dnl The chassis was created with other_config:ct-no-masked-label=false, the 
flows
 dnl should be using ct_label.ecmp_reply_port.
 AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], 
[dnl
-  table=??(lr_in_arp_resolve  ), priority=200  , match=(ct.rpl && 
ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; 
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
+  table=??(lr_in_arp_resolve  ), priority=200  , 
match=(ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; 
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
 ])
 
 dnl Simulate an ovn-controller upgrade to a version that supports
@@ -6761,14 +6772,19 @@ check ovn-sbctl set chassis ch1 
other_config:ct-no-masked-label=true
 check ovn-nbctl --wait=sb sync
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], 
[dnl
-  table=??(lr_in_arp_resolve  ), priority=200  , match=(ct.rpl && 
ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; eth.dst 
= xxreg1[[32..79]]; pop(xxreg1); next;)
+  table=??(lr_in_arp_resolve  ), priority=200  , 
match=(ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; 
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
 ])
 
 # add ecmp route with wrong nexthop
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
192.168.1.20
 
 ovn-sbctl dump-flows lr0 > lr0flows
-AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], 
[0], [dnl
+AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
+  table=??(lr_in_ip_routing   ), priority=0    , match=(1), action=(drop;)
+  table=??(lr_in_ip_routing   ), priority=10300, 
match=(ct_mark.ecmp_reply_port == 1 && reg7 == 0 && ip4.dst == 1.0.0.1/32), 
action=(ip.ttl--; flags.loopback = 1; eth.src = 00:00:20:20:12:13; reg1 = 
192.168.0.1; outport = "lr0-public"; next;)
+  table=??(lr_in_ip_routing   ), priority=10550, match=(nd_rs || nd_ra), 
action=(drop;)
+  table=??(lr_in_ip_routing   ), priority=194  , match=(inport == "lr0-public" 
&& ip6.dst == fe80::/64), action=(ip.ttl--; reg8[[0..15]] = 0; xxreg0 = 
ip6.dst; xxreg1 = fe80::200:20ff:fe20:1213; eth.src = 00:00:20:20:12:13; 
outport = "lr0-public"; flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=74   , match=(ip4.dst == 
192.168.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; 
flags.loopback = 1; next;)
   table=??(lr_in_ip_routing   ), priority=97   , match=(reg7 == 0 && ip4.dst 
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; 
reg8[[16..31]] = select(1, 2);)
 ])
 AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 
's/192\.168\.0\..0/192.168.0.??/' | ovn_strip_lflows], [0], [dnl
-- 
2.45.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to