The mechanism for refreshing mac binding entries will send out an ARP/ND
request using the logical port's mac address, regardless of whether the
port is local to a given chassis.

For gateway ports this will make a given LRP incorrectly appear to be
claimed on the chassis that sends out the request, thereby confusing
physical switches and other gateway chassis, which ultimately results in
broken connectivity for that LRP.
In particular, when the chassis that has actually claimed the LRP, sees
the request from the other chassis with the mac address from its LRP,
the OVS bridge that provides the external connectivity will incorrectly
relearn the LRP's mac address on the port that is connected to the
physical network which causes it to drop all further traffic destined
to that LRP until egress traffic coming from the LRP causes it to
move the mac back to the correct port or the FDB entry times out.

Add a check to verify that a logical port from a mac binding is local to
a given chassis before trying to refresh it.

Fixes: 1e4d4409f391 ("controller: Send ARP/ND for stale mac_bindings entries.")
Signed-off-by: Felix Moebius <[email protected]>
---
 controller/mac-cache.c      | 13 ++++++++++++-
 controller/mac-cache.h      |  3 +++
 controller/ovn-controller.c |  2 +-
 controller/statctrl.c       |  7 ++++---
 controller/statctrl.h       |  1 +
 5 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/controller/mac-cache.c b/controller/mac-cache.c
index 4f859b7ea..0c8e5260b 100644
--- a/controller/mac-cache.c
+++ b/controller/mac-cache.c
@@ -402,6 +402,7 @@ void
 mac_binding_stats_run(
         struct rconn *swconn OVS_UNUSED,
         struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
+        const struct sbrec_chassis *chassis OVS_UNUSED,
         struct vector *stats_vec, uint64_t *req_delay, void *data)
 {
     struct mac_cache_data *cache_data = data;
@@ -445,7 +446,7 @@ mac_binding_stats_run(
 
     mac_cache_update_req_delay(&cache_data->thresholds, req_delay);
     if (*req_delay) {
-        VLOG_DBG("MAC binding statistics dalay: %"PRIu64, *req_delay);
+        VLOG_DBG("MAC binding statistics delay: %"PRIu64, *req_delay);
     }
 }
 
@@ -500,6 +501,7 @@ fdb_update_log(const char *action,
 void
 fdb_stats_run(struct rconn *swconn OVS_UNUSED,
               struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
+              const struct sbrec_chassis *chassis OVS_UNUSED,
               struct vector *stats_vec,
               uint64_t *req_delay, void *data)
 {
@@ -871,6 +873,7 @@ void
 mac_binding_probe_stats_run(
         struct rconn *swconn,
         struct ovsdb_idl_index *sbrec_port_binding_by_name,
+        const struct sbrec_chassis *chassis,
         struct vector *stats_vec,
         uint64_t *req_delay, void *data)
 {
@@ -892,6 +895,14 @@ mac_binding_probe_stats_run(
         uint64_t since_updated_ms = timewall_now - mb->sbrec->timestamp;
         const struct sbrec_mac_binding *sbrec = mb->sbrec;
 
+        if (!lport_is_local(sbrec_port_binding_by_name, chassis,
+                            sbrec->logical_port)) {
+            mac_binding_update_log("Not sending ARP/ND request for non-local",
+                                   &mb->data, true, threshold,
+                                   stats->idle_age_ms, since_updated_ms);
+            continue;
+        }
+
         if (stats->idle_age_ms > threshold->value) {
             mac_binding_update_log("Not sending ARP/ND request for non-active",
                                    &mb->data, true, threshold,
diff --git a/controller/mac-cache.h b/controller/mac-cache.h
index d7b5b9cd5..d05d964e6 100644
--- a/controller/mac-cache.h
+++ b/controller/mac-cache.h
@@ -194,6 +194,7 @@ mac_binding_stats_process_flow_stats(struct vector 
*stats_vec,
 void mac_binding_stats_run(
         struct rconn *swconn OVS_UNUSED,
         struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
+        const struct sbrec_chassis *chassis OVS_UNUSED,
         struct vector *stats_vec, uint64_t *req_delay, void *data);
 
 /* FDB stat processing. */
@@ -203,6 +204,7 @@ void fdb_stats_process_flow_stats(struct vector *stats_vec,
 void fdb_stats_run(
         struct rconn *swconn OVS_UNUSED,
         struct ovsdb_idl_index *sbrec_port_binding_by_name OVS_UNUSED,
+        const struct sbrec_chassis *chassis OVS_UNUSED,
         struct vector *stats_vec, uint64_t *req_delay, void *data);
 
 /* Packet buffering. */
@@ -238,6 +240,7 @@ void mac_binding_probe_stats_process_flow_stats(
 void mac_binding_probe_stats_run(
         struct rconn *swconn,
         struct ovsdb_idl_index *sbrec_port_binding_by_name,
+        const struct sbrec_chassis *chassis,
         struct vector *stats_vec, uint64_t *req_delay, void *data);
 
 #endif /* controller/mac-cache.h */
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 4353f6094..f57618273 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -7843,7 +7843,7 @@ main(int argc, char *argv[])
                     mac_cache_data = engine_get_data(&en_mac_cache);
                     if (mac_cache_data) {
                         statctrl_run(ovnsb_idl_txn, sbrec_port_binding_by_name,
-                                     mac_cache_data);
+                                     chassis, mac_cache_data);
                     }
 
                     ofctrl_seqno_update_create(
diff --git a/controller/statctrl.c b/controller/statctrl.c
index a76bac056..083696d46 100644
--- a/controller/statctrl.c
+++ b/controller/statctrl.c
@@ -66,6 +66,7 @@ struct stats_node {
      * This function runs in main thread locked behind mutex. */
     void (*run)(struct rconn *swconn,
                 struct ovsdb_idl_index *sbrec_port_binding_by_name,
+                const struct sbrec_chassis *chassis,
                 struct vector *stats,
                 uint64_t *req_delay, void *data);
     /* Name of the stats node. */
@@ -175,6 +176,7 @@ statctrl_init(void)
 void
 statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
              struct ovsdb_idl_index *sbrec_port_binding_by_name,
+             const struct sbrec_chassis *chassis,
              struct mac_cache_data *mac_cache_data)
 {
     if (!ovnsb_idl_txn) {
@@ -197,9 +199,8 @@ statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
         uint64_t prev_delay = node->request_delay;
 
         stopwatch_start(node->name, time_msec());
-        node->run(statctrl_ctx.swconn,
-                  sbrec_port_binding_by_name, &node->stats,
-                  &node->request_delay, node_data[i]);
+        node->run(statctrl_ctx.swconn, sbrec_port_binding_by_name, chassis,
+                  &node->stats, &node->request_delay, node_data[i]);
         vector_clear(&node->stats);
         if (vector_capacity(&node->stats) >= STATS_VEC_CAPACITY_THRESHOLD) {
             VLOG_DBG("The statistics vector for node '%s' capacity "
diff --git a/controller/statctrl.h b/controller/statctrl.h
index 69a0b6f35..3e56b4a54 100644
--- a/controller/statctrl.h
+++ b/controller/statctrl.h
@@ -21,6 +21,7 @@
 void statctrl_init(void);
 void statctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                   struct ovsdb_idl_index *sbrec_port_binding_by_name,
+                  const struct sbrec_chassis *chassis,
                   struct mac_cache_data *mac_cache_data);
 
 void statctrl_update_swconn(const char *target, int probe_interval);
-- 
2.52.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to