Rely on the following new actions for ecmp-symmetric routing:
- chk_ecmp_nh_mac
- chk_ecmp_nh
- commit_ecmp_nh

In this way OVN is able to keep up if for any reason the ECMP traffic
source changes L2 address and keeps old L3 addresses.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2096233
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
 northd/northd.c         | 46 +++++++++++++++++++++++++++---------
 northd/ovn-northd.8.xml | 25 ++++++++++++--------
 tests/ovn-northd.at     |  4 ++--
 tests/ovn.at            |  4 ++--
 tests/system-ovn.at     | 52 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 106 insertions(+), 25 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index d31cb1688..f271b4882 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -227,6 +227,7 @@ enum ovn_stage {
 #define REGBIT_LOOKUP_NEIGHBOR_RESULT "reg9[2]"
 #define REGBIT_LOOKUP_NEIGHBOR_IP_RESULT "reg9[3]"
 #define REGBIT_DST_NAT_IP_LOCAL "reg9[4]"
+#define REGBIT_KNOWN_ECMP_NH    "reg9[5]"
 
 /* Register to store the eth address associated to a router port for packets
  * received in S_ROUTER_IN_ADMISSION.
@@ -327,7 +328,8 @@ enum ovn_stage {
  * |     |   EGRESS_LOOPBACK/       | G |     UNUSED      |
  * | R9  |   PKT_LARGER/            | 4 |                 |
  * |     |   LOOKUP_NEIGHBOR_RESULT/|   |                 |
- * |     |   SKIP_LOOKUP_NEIGHBOR}  |   |                 |
+ * |     |   SKIP_LOOKUP_NEIGHBOR/  |   |                 |
+ * |     |   KNOWN_ECMP_NH}         |   |                 |
  * |     |                          |   |                 |
  * |     | REG_ORIG_TP_DPORT_ROUTER |   |                 |
  * |     |                          |   |                 |
@@ -9437,6 +9439,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
                                struct ds *route_match)
 {
     const struct nbrec_logical_router_static_route *st_route = route->route;
+    struct ds base_match = DS_EMPTY_INITIALIZER;
     struct ds match = DS_EMPTY_INITIALIZER;
     struct ds actions = DS_EMPTY_INITIALIZER;
     struct ds ecmp_reply = DS_EMPTY_INITIALIZER;
@@ -9448,20 +9451,22 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
     /* If symmetric ECMP replies are enabled, then packets that arrive over
      * an ECMP route need to go through conntrack.
      */
-    ds_put_format(&match, "inport == %s && ip%s.%s == %s",
+    ds_put_format(&base_match, "inport == %s && ip%s.%s == %s",
                   out_port->json_key,
                   IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "4" : "6",
                   route->is_src_route ? "dst" : "src",
                   cidr);
     free(cidr);
     ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100,
-                            ds_cstr(&match), "ct_next;",
-                            &st_route->header_);
+            ds_cstr(&base_match),
+            REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh_mac(); ct_next;",
+            &st_route->header_);
 
     /* And packets that go out over an ECMP route need conntrack */
     ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100,
-                            ds_cstr(route_match), "ct_next;",
-                            &st_route->header_);
+            ds_cstr(route_match),
+            REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh(); ct_next;",
+            &st_route->header_);
 
     /* Save src eth and inport in ct_label for packets that arrive over
      * an ECMP route.
@@ -9469,11 +9474,28 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
      * NOTE: we purposely are not clearing match before this
      * ds_put_cstr() call. The previous contents are needed.
      */
-    ds_put_cstr(&match, " && (ct.new && !ct.est)");
+    ds_put_format(&match, "%s && (ct.new && !ct.est)", ds_cstr(&base_match));
+    ds_put_format(&actions,
+            "ct_commit { ct_label.ecmp_reply_eth = eth.src; "
+            " %s = %" PRId64 ";}; "
+            "commit_ecmp_nh(ipv6 = %s;); next;",
+            ct_ecmp_reply_port_match, out_port->sb->tunnel_key,
+            IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "false" : "true");
+    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
+                            ds_cstr(&match), ds_cstr(&actions),
+                            &st_route->header_);
 
-    ds_put_format(&actions, "ct_commit { ct_label.ecmp_reply_eth = eth.src;"
-                  " %s = %" PRId64 ";}; next;",
-                  ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
+    ds_clear(&match);
+    ds_put_format(&match,
+                  "%s && (!ct.rpl && ct.est) && "REGBIT_KNOWN_ECMP_NH" == 0",
+                  ds_cstr(&base_match));
+    ds_clear(&actions);
+    ds_put_format(&actions,
+            "ct_commit { ct_label.ecmp_reply_eth = eth.src; "
+            " %s = %" PRId64 ";}; "
+            "commit_ecmp_nh(ipv6 = %s;); next;",
+            ct_ecmp_reply_port_match, out_port->sb->tunnel_key,
+            IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "false" : "true");
     ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 100,
                             ds_cstr(&match), ds_cstr(&actions),
                             &st_route->header_);
@@ -9481,7 +9503,8 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
     /* Bypass ECMP selection if we already have ct_label information
      * for where to route the packet.
      */
-    ds_put_format(&ecmp_reply, "ct.rpl && %s == %"PRId64,
+    ds_put_format(&ecmp_reply,
+                  "ct.rpl && "REGBIT_KNOWN_ECMP_NH" == 1 && %s == %"PRId64,
                   ct_ecmp_reply_port_match, out_port->sb->tunnel_key);
     ds_clear(&match);
     ds_put_format(&match, "%s && %s", ds_cstr(&ecmp_reply),
@@ -9517,6 +9540,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
                             200, ds_cstr(&ecmp_reply),
                             action, &st_route->header_);
 
+    ds_destroy(&base_match);
     ds_destroy(&match);
     ds_destroy(&actions);
     ds_destroy(&ecmp_reply);
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index a87df24bd..aa7196a92 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -3166,8 +3166,12 @@ icmp6 {
       configured. The matching logic for these ports essentially reverses the
       configured logic of the ECMP route. So for instance, a route with a
       destination routing policy will instead match if the source IP address
-      matches the static route's prefix. The flow uses the action
-      <code>ct_next</code> to send IP packets to the connection tracker for
+      matches the static route's prefix. The flow uses the actions
+      <code>chk_ecmp_nh_mac(); ct_next</code> or
+      <code>chk_ecmp_nh(); ct_next</code> to send IP packets to table
+      <code>OFTABLE_ECMP_NH_MAC</code> or to table
+      <code>OFTABLE_ECMP_NH</code> in order to check if source info are already
+      stored by OVN and then to the connection tracker for
       packet de-fragmentation and tracking before sending it to the next table.
     </p>
 
@@ -3426,10 +3430,11 @@ icmp6 {
         route with a destination routing policy will instead match if the
         source IP address matches the static route's prefix. The flow uses
         the action <code>ct_commit { ct_label.ecmp_reply_eth = eth.src;"
-        " ct_mark.ecmp_reply_port = <var>K</var>;}; next; </code> to commit
-        the connection and storing <code>eth.src</code> and the ECMP
-        reply port binding tunnel key <var>K</var> in the
-        <code>ct_label</code>.
+        " ct_mark.ecmp_reply_port = <var>K</var>;}; commit_ecmp_nh(); next;
+        </code> to commit the connection and storing <code>eth.src</code> and
+        the ECMP reply port binding tunnel key <var>K</var> in the
+        <code>ct_label</code> and the traffic pattern to table
+        <code>OFTABLE_ECMP_NH_MAC</code> or <code>OFTABLE_ECMP_NH</code>.
       </li>
     </ul>
 
@@ -3568,10 +3573,10 @@ output;
       which the packet should be sent. The <code>ct_mark.ecmp_reply_port</code>
       tells the logical router port on which the packet should be sent. These
       values saved to the conntrack fields when the initial ingress traffic is
-      received over the ECMP route and committed to conntrack. The
-      priority-10300 flows in this stage set the <code>outport</code>,
-      while the <code>eth.dst</code> is set by flows at the ARP/ND Resolution
-      stage.
+      received over the ECMP route and committed to conntrack.
+      If <code>REGBIT_KNOWN_ECMP_NH</code> is set, the priority-10300
+      flows in this stage set the <code>outport</code>, while the
+      <code>eth.dst</code> is set by flows at the ARP/ND Resolution stage.
     </p>
 
     <p>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 610a5ce12..c153a338c 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -5647,7 +5647,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 
's/192\.168\.0\..0/192.
 dnl The chassis was created with other_config:ct-no-masked-label=false, the 
flows
 dnl should be using ct_label.ecmp_reply_port.
 AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | sed 
's/table=../table=??/'], [0], [dnl
-  table=??(lr_in_arp_resolve  ), priority=200  , match=(ct.rpl && 
ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; 
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
+  table=??(lr_in_arp_resolve  ), priority=200  , match=(ct.rpl && reg9[[5]] == 
1 && ct_label.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; 
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
 ])
 
 dnl Simulate an ovn-controller upgrade to a version that supports
@@ -5657,7 +5657,7 @@ check ovn-sbctl set chassis ch1 
other_config:ct-no-masked-label=true
 check ovn-nbctl --wait=sb sync
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | sed 
's/table=../table=??/'], [0], [dnl
-  table=??(lr_in_arp_resolve  ), priority=200  , match=(ct.rpl && 
ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; eth.dst 
= xxreg1[[32..79]]; pop(xxreg1); next;)
+  table=??(lr_in_arp_resolve  ), priority=200  , match=(ct.rpl && reg9[[5]] == 
1 && ct_mark.ecmp_reply_port == 1), action=(push(xxreg1); xxreg1 = ct_label; 
eth.dst = xxreg1[[32..79]]; pop(xxreg1); next;)
 ])
 
 # add ecmp route with wrong nexthop
diff --git a/tests/ovn.at b/tests/ovn.at
index 6e35bb5b8..f180b1814 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -27113,7 +27113,7 @@ AT_CHECK([
         grep "priority=200" | \
         grep -c 
"move:NXM_NX_CT_LABEL\\[[\\]]->NXM_NX_XXREG1\\[[\\]],move:NXM_NX_XXREG1\\[[32..79\\]]->NXM_OF_ETH_DST"
     done; :], [0], [dnl
-1
+2
 1
 0
 0
@@ -27238,7 +27238,7 @@ AT_CHECK([
         grep "priority=200" | \
         grep -c 
"move:NXM_NX_CT_LABEL\\[[\\]]->NXM_NX_XXREG1\\[[\\]],move:NXM_NX_XXREG1\\[[32..79\\]]->NXM_OF_ETH_DST"
     done; :], [0], [dnl
-1
+2
 1
 0
 0
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 1cabf1f31..7c7063a38 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -5981,6 +5981,32 @@ AT_CHECK([ovs-appctl dpctl/dump-flows | grep 
'ct_state(-new+est+rpl+trk).*ct_lab
 1
 ])
 
+# Flush conntrack entries for easier output parsing of next test.
+AT_CHECK([ovs-appctl dpctl/flush-conntrack])
+
+# Change bob1 L2 address anche check the reply is properly updated.
+ovn-nbctl set Logical_Router_Port R2_ext mac='"00:00:10:01:02:04"'
+ovn-nbctl set Logical_Switch_Port r2-ext \
+     type=router options:router-port=R2_ext addresses='"00:00:10:01:02:04"'
+
+NS_CHECK_EXEC([bob1], [ping -q -c 20 -i 0.3 -w 15 10.0.0.2 | FORMAT_PING], \
+[0], [dnl
+20 packets transmitted, 20 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep 
'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
+1
+])
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep 
'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
+1
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | 
FORMAT_CT(172.16.0.1) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
+icmp,orig=(src=172.16.0.1,dst=10.0.0.2,id=<cleared>,type=8,code=0),reply=(src=10.0.0.2,dst=172.16.0.1,id=<cleared>,type=0,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
+])
+
 ovs-ofctl dump-flows br-int
 
 OVS_APP_EXIT_AND_WAIT([ovn-controller])
@@ -6148,6 +6174,32 @@ sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
 
icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x401020400000000
 ])
 
+# Flush conntrack entries for easier output parsing of next test.
+AT_CHECK([ovs-appctl dpctl/flush-conntrack])
+
+# Change bob1 L2 address anche check the reply is properly updated.
+ovn-nbctl set Logical_Router_Port R2_ext mac='"00:00:10:01:02:04"'
+ovn-nbctl set Logical_Switch_Port r2-ext \
+     type=router options:router-port=R2_ext addresses='"00:00:10:01:02:04"'
+
+NS_CHECK_EXEC([bob1], [ping -q -c 20 -i 0.3 -w 15 fd01::2 | FORMAT_PING], \
+[0], [dnl
+20 packets transmitted, 20 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep 
'ct_state(+new-est-rpl+trk).*ct(.*label=0x1001020400000000/.*)' -c], [0], [dnl
+1
+])
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep 
'ct_state(-new+est+rpl+trk).*ct_label(0x1001020400000000)' -c], [0], [dnl
+1
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 0x1001020400000000 | 
FORMAT_CT(fd01::2) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/' |
+sed -e 's/mark=[[0-9]]*/mark=<cleared>/'], [0], [dnl
+icmpv6,orig=(src=fd07::1,dst=fd01::2,id=<cleared>,type=128,code=0),reply=(src=fd01::2,dst=fd07::1,id=<cleared>,type=129,code=0),zone=<cleared>,mark=<cleared>,labels=0x1001020400000000
+])
+
 ovs-ofctl dump-flows br-int
 
 OVS_APP_EXIT_AND_WAIT([ovn-controller])
-- 
2.36.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to