If BFD is down between two gateways in a chassis group, while both gateways
are alive, both ovn-controller were fighting to claim the router port.
There was a 500 msec, hardcoded, grace time preventing both controller
to run at 100% CPU.
Depending of sb dynamics, either gw1 or gw2 ended up claiming the port
most of the time.
For instance:
- gw1 claims the port at time T
- gw2 claims the port at time T (+ few micros)
- gw1 postpone the claims.
- gw1 claims the port at time T+500.
- gw2 claims the port at time T+500 (+ few micros)
- gw1 postpone the claims
However, computes still send their traffic towards the highest priority gateway.

This patch guarantees that, if the highest priority gateway is alive, it always 
claims
the port.
This patch does not change the behavior if the highest priority chassis "dies".

Note: computes send their traffic towards ha gateways based on BFD status and 
priority, i.e.
without ovn-controller and sb handling.
We do not want to base the traffic on which gateway claims the router port, as 
this would require
ovn-controller & sb handling, potentially delaying migration of the traffic.

Signed-off-by: Xavier Simonart <xsimo...@redhat.com>
Acked-by: Ales Musil <amu...@redhat.com>
Signed-off-by: Ales Musil <amu...@redhat.com>
(cherry picked from commit 9c2f897bcebb3d89afa223854ca07b76996bc870)

---
v2: Added missing cherry-pick and Acked-by/Signed-off-by from origin commit.
---
 controller/binding.c    | 24 +++++++++++++++++++-----
 controller/binding.h    |  4 +++-
 controller/ha-chassis.c |  2 +-
 controller/ha-chassis.h |  4 ++++
 controller/if-status.c  | 14 ++++++++++----
 5 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/controller/binding.c b/controller/binding.c
index 9419c70b1..1ad6a72cd 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -1338,16 +1338,30 @@ remove_additional_chassis(const struct 
sbrec_port_binding *pb,
 }
 
 bool
-lport_maybe_postpone(const char *port_name, long long int now,
+lport_maybe_postpone(const struct sbrec_port_binding *pb,
+                     const struct sbrec_chassis *chassis_rec,
+                     long long int now,
                      struct sset *postponed_ports)
 {
-    long long int last_claimed = get_claim_timestamp(port_name);
+    if (pb->ha_chassis_group) {
+        struct ha_chassis_ordered *ordered_ha_ch =
+            get_ordered_ha_chassis_list(pb->ha_chassis_group, NULL,
+                                        chassis_rec);
+        if (ordered_ha_ch &&
+            ordered_ha_ch->ha_ch[0].chassis == chassis_rec) {
+            ha_chassis_destroy_ordered(ordered_ha_ch);
+            return false;
+        } else {
+            ha_chassis_destroy_ordered(ordered_ha_ch);
+        }
+    }
+    long long int last_claimed = get_claim_timestamp(pb->logical_port);
     if (now - last_claimed >= CLAIM_TIME_THRESHOLD_MS) {
         return false;
     }
 
-    sset_add(postponed_ports, port_name);
-    VLOG_DBG("Postponed claim on logical port %s.", port_name);
+    sset_add(postponed_ports, pb->logical_port);
+    VLOG_DBG("Postponed claim on logical port %s.", pb->logical_port);
 
     return true;
 }
@@ -1378,7 +1392,7 @@ claim_lport(const struct sbrec_port_binding *pb,
         if (pb->chassis != chassis_rec) {
             long long int now = time_msec();
             if (pb->chassis) {
-                if (lport_maybe_postpone(pb->logical_port, now,
+                if (lport_maybe_postpone(pb, chassis_rec, now,
                                          postponed_ports)) {
                     return true;
                 }
diff --git a/controller/binding.h b/controller/binding.h
index f44f95833..6e4bb0093 100644
--- a/controller/binding.h
+++ b/controller/binding.h
@@ -276,7 +276,9 @@ void update_qos(struct ovsdb_idl_index * 
sbrec_port_binding_by_name,
                 const struct ovsrec_open_vswitch_table *ovs_table,
                 const struct ovsrec_bridge_table *bridge_table);
 
-bool lport_maybe_postpone(const char *port_name, long long int now,
+bool lport_maybe_postpone(const struct sbrec_port_binding *pb,
+                          const struct sbrec_chassis *chassis_rec,
+                          long long int now,
                           struct sset *postponed_ports);
 
 void claimed_lport_set_up(const struct sbrec_port_binding *pb,
diff --git a/controller/ha-chassis.c b/controller/ha-chassis.c
index 945c306b6..ad0b3ef0b 100644
--- a/controller/ha-chassis.c
+++ b/controller/ha-chassis.c
@@ -59,7 +59,7 @@ compare_chassis_prio_(const void *a_, const void *b_)
  * If active_tunnels is set is empty and local_chassis is HA3,
  * then it returns NULL.
  */
-static struct ha_chassis_ordered *
+struct ha_chassis_ordered *
 get_ordered_ha_chassis_list(const struct sbrec_ha_chassis_group *ha_ch_grp,
                             const struct sset *active_tunnels,
                             const struct sbrec_chassis *local_chassis)
diff --git a/controller/ha-chassis.h b/controller/ha-chassis.h
index 3768c2a5c..c7c91e001 100644
--- a/controller/ha-chassis.h
+++ b/controller/ha-chassis.h
@@ -46,5 +46,9 @@ struct ha_chassis_ordered *ha_chassis_get_ordered(
 
 void ha_chassis_destroy_ordered(
     struct ha_chassis_ordered *ordered_ha_ch);
+struct ha_chassis_ordered *get_ordered_ha_chassis_list(
+    const struct sbrec_ha_chassis_group *ha_ch_grp,
+    const struct sset *active_tunnels,
+    const struct sbrec_chassis *local_chassis);
 
 #endif /* OVN_HA_CHASSIS_H */
diff --git a/controller/if-status.c b/controller/if-status.c
index 32b6064ba..9b0f2bcdf 100644
--- a/controller/if-status.c
+++ b/controller/if-status.c
@@ -515,8 +515,11 @@ if_status_mgr_update(struct if_status_mgr *mgr,
                 chassis_rec)) {
                 if (!sb_readonly) {
                     long long int now = time_msec();
-                    if (lport_maybe_postpone(iface->id, now,
-                                             get_postponed_ports())) {
+                    const struct sbrec_port_binding *pb =
+                        sbrec_port_binding_table_get_for_uuid(pb_table,
+                                                              &iface->pb_uuid);
+                    if (pb && lport_maybe_postpone(pb, chassis_rec, now,
+                                                   get_postponed_ports())) {
                         continue;
                     }
                     local_binding_set_pb(bindings, iface->id, chassis_rec,
@@ -572,8 +575,11 @@ if_status_mgr_update(struct if_status_mgr *mgr,
             if (!local_bindings_pb_chassis_is_set(bindings, iface->id,
                 chassis_rec)) {
                 long long int now = time_msec();
-                if (lport_maybe_postpone(iface->id, now,
-                                         get_postponed_ports())) {
+                const struct sbrec_port_binding *pb =
+                    sbrec_port_binding_table_get_for_uuid(pb_table,
+                                                          &iface->pb_uuid);
+                if (pb && lport_maybe_postpone(pb, chassis_rec, now,
+                                               get_postponed_ports())) {
                     continue;
                 }
                 local_binding_set_pb(bindings, iface->id, chassis_rec,
-- 
2.47.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to