If you set a chassis' encap ip to be the same as that of another
chassis, OVN should detect it, log an error, and delete the chassis
record.

But this was not the case; OVN would allow the transaction. However, the
commit to the southbound database would fail, because there was already
an encap record with that same ip and type (from the other chassis).
The OVS db on the problematic chassis would then rapidly increase in
size. Essentially, the controller would get stuck in a loop of deleting
and recreating the tunnels.

This change makes it so when encaps are change, before building encaps,
the controller detects if there is already an existing encap tunnel of
the same ip and type. If so, it logs an error, refuses to build the
encaps, and deletes the associated chassis and chassis private record.

Reported-at: https://issues.redhat.com/browse/FDP-1481
Co-authored-by: Nicholas Hubbard <[email protected]>
Signed-off-by: Nicholas Hubbard <[email protected]>
Signed-off-by: Rosemarie O'Riorden <[email protected]>
---
v3:
- Update misleading comment
- Add deletion of chassis private record
- Move declaration function name to same line as return type
- Delete "Transaction causes multiple rows" error from exceptions of affected
  test
- Update my own error to be more specific in exceptions of affected test
- Update test to not be racy; Check for db row counts instead of saving file
  size after change
- Update commit message to reflect changes

v2: Fix typo in title.
---
 controller/chassis.c        | 99 ++++++++++++++++++++++++++-----------
 controller/chassis.h        |  3 +-
 controller/ovn-controller.c |  5 +-
 lib/chassis-index.c         | 20 ++++++++
 lib/chassis-index.h         |  3 ++
 tests/ovn-controller.at     | 36 ++++++++++++--
 6 files changed, 130 insertions(+), 36 deletions(-)

diff --git a/controller/chassis.c b/controller/chassis.c
index 07bef1c56..12117259d 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -77,6 +77,12 @@ struct ovs_chassis_cfg {
     bool ct_label_flush;
 };
 
+enum chassis_update_status {
+    CHASSIS_UPDATED,
+    CHASSIS_NOT_UPDATED,
+    CHASSIS_NEED_DELETE
+};
+
 static void
 ovs_chassis_cfg_init(struct ovs_chassis_cfg *cfg)
 {
@@ -699,7 +705,8 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
                      const char *encap_ip_default,
                      const char *chassis_id,
                      const char *encap_csum,
-                     size_t *n_encap)
+                     size_t *n_encap,
+                     struct ovsdb_idl_index *sbrec_encaps)
 {
     size_t tunnel_count = 0;
 
@@ -713,6 +720,18 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
 
     SSET_FOR_EACH (encap_ip, encap_ip_set) {
         SSET_FOR_EACH (encap_type, encap_type_set) {
+            const struct sbrec_encap *encap_rec =
+                encap_lookup_by_ip_and_type(sbrec_encaps,
+                                            encap_ip, encap_type);
+            if (encap_rec && strcmp(encap_rec->chassis_name, chassis_id)) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "'%s' already has encap ip '%s' and "
+                             "type '%s', cannot duplicate on '%s'",
+                             encap_rec->chassis_name, encap_rec->ip,
+                             encap_rec->type, chassis_id);
+                continue;
+            }
+
             struct sbrec_encap *encap = sbrec_encap_insert(ovnsb_idl_txn);
 
             sbrec_encap_set_type(encap, encap_type);
@@ -778,7 +797,7 @@ update_supported_sset(struct sset *supported)
 
 static void
 remove_unsupported_options(const struct sbrec_chassis *chassis_rec,
-                           bool *updated)
+                           enum chassis_update_status *update_status)
 {
     struct sset supported_options = SSET_INITIALIZER(&supported_options);
     update_supported_sset(&supported_options);
@@ -789,7 +808,7 @@ remove_unsupported_options(const struct sbrec_chassis 
*chassis_rec,
             VLOG_WARN("Removing unsupported key \"%s\" from chassis record.",
                       node->key);
             sbrec_chassis_update_other_config_delkey(chassis_rec, node->key);
-            *updated = true;
+            *update_status = CHASSIS_UPDATED;
         }
     }
 
@@ -827,25 +846,28 @@ chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
 }
 
 /* Update a Chassis record based on the config in the ovs config.
- * Returns true if 'chassis_rec' was updated, false otherwise.
+ * Returns CHASSIS_UPDATED if 'chassis_rec' was updated, CHASSIS_NEED_DELETE if
+ * another chassis exists with an encap with same IP and type as 'chassis', and
+ * returns CHASSIS_NOT_UPDATED otherwise.
  */
-static bool
+static enum chassis_update_status
 chassis_update(const struct sbrec_chassis *chassis_rec,
                struct ovsdb_idl_txn *ovnsb_idl_txn,
                const struct ovs_chassis_cfg *ovs_cfg,
                const char *chassis_id,
-               const struct sset *transport_zones)
+               const struct sset *transport_zones,
+               struct ovsdb_idl_index *sbrec_encaps)
 {
-    bool updated = false;
+    enum chassis_update_status update_status = CHASSIS_NOT_UPDATED;
 
     if (strcmp(chassis_id, chassis_rec->name)) {
         sbrec_chassis_set_name(chassis_rec, chassis_id);
-        updated = true;
+        update_status = CHASSIS_UPDATED;
     }
 
     if (strcmp(ovs_cfg->hostname, chassis_rec->hostname)) {
         sbrec_chassis_set_hostname(chassis_rec, ovs_cfg->hostname);
-        updated = true;
+        update_status = CHASSIS_UPDATED;
     }
 
     if (chassis_other_config_changed(ovs_cfg, chassis_rec)) {
@@ -856,12 +878,12 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
         sbrec_chassis_verify_other_config(chassis_rec);
         sbrec_chassis_set_other_config(chassis_rec, &other_config);
         smap_destroy(&other_config);
-        updated = true;
+        update_status = CHASSIS_UPDATED;
     }
 
     update_chassis_transport_zones(transport_zones, chassis_rec);
 
-    remove_unsupported_options(chassis_rec, &updated);
+    remove_unsupported_options(chassis_rec, &update_status);
 
     /* If any of the encaps should change, update them. */
     bool tunnels_changed =
@@ -871,20 +893,27 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
                                 ovs_cfg->encap_csum,
                                 chassis_rec);
     if (!tunnels_changed) {
-        return updated;
+        return update_status;
     }
 
     struct sbrec_encap **encaps;
     size_t n_encap;
 
     encaps =
-        chassis_build_encaps(ovnsb_idl_txn, &ovs_cfg->encap_type_set,
+        chassis_build_encaps(ovnsb_idl_txn,
+                             &ovs_cfg->encap_type_set,
                              &ovs_cfg->encap_ip_set,
                              ovs_cfg->encap_ip_default, chassis_id,
-                             ovs_cfg->encap_csum, &n_encap);
+                             ovs_cfg->encap_csum, &n_encap,
+                             sbrec_encaps);
     sbrec_chassis_set_encaps(chassis_rec, encaps, n_encap);
     free(encaps);
-    return true;
+
+    if (!n_encap) {
+        return CHASSIS_NEED_DELETE;
+    }
+
+    return CHASSIS_UPDATED;
 }
 
 /* If this is a chassis_private config update after we initialized the record
@@ -935,7 +964,8 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
             const char *chassis_id,
             const struct ovsrec_bridge *br_int,
             const struct sset *transport_zones,
-            const struct sbrec_chassis_private **chassis_private)
+            const struct sbrec_chassis_private **chassis_private,
+            struct ovsdb_idl_index *sbrec_encaps)
 {
     struct ovs_chassis_cfg ovs_cfg;
 
@@ -956,22 +986,31 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
      * modified in the ovs table.
      */
     if (chassis_rec && ovnsb_idl_txn) {
-        bool updated = chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg,
-                                      chassis_id, transport_zones);
-
-        if (!existed || updated) {
-            ovsdb_idl_txn_add_comment(ovnsb_idl_txn,
-                                      "ovn-controller: %s chassis '%s'",
-                                      !existed ? "registering" : "updating",
-                                      chassis_id);
-        }
+        enum chassis_update_status update_status =
+            chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg,
+                           chassis_id,
+                           transport_zones, sbrec_encaps);
+
+        *chassis_private = chassis_private_get_record(ovnsb_idl_txn,
+                               sbrec_chassis_private_by_name, chassis_id);
 
-        *chassis_private =
-            chassis_private_get_record(ovnsb_idl_txn,
-                                       sbrec_chassis_private_by_name,
+        if (update_status == CHASSIS_NEED_DELETE) {
+            sbrec_chassis_delete(chassis_rec);
+            chassis_rec = NULL;
+            if (*chassis_private) {
+                sbrec_chassis_private_delete(*chassis_private);
+            }
+        } else {
+            if (!existed || update_status == CHASSIS_UPDATED) {
+                ovsdb_idl_txn_add_comment(ovnsb_idl_txn,
+                                          "ovn-controller: %s chassis '%s'",
+                                          !existed ? "registering"
+                                          : "updating", chassis_id);
+            }
+            if (*chassis_private) {
+                chassis_private_update(*chassis_private, chassis_rec,
                                        chassis_id);
-        if (*chassis_private) {
-            chassis_private_update(*chassis_private, chassis_rec, chassis_id);
+            }
         }
     }
 
diff --git a/controller/chassis.h b/controller/chassis.h
index 03cc2f906..14251bdad 100644
--- a/controller/chassis.h
+++ b/controller/chassis.h
@@ -44,7 +44,8 @@ const struct sbrec_chassis *chassis_run(
     const struct ovsrec_open_vswitch_table *,
     const char *chassis_id, const struct ovsrec_bridge *br_int,
     const struct sset *transport_zones,
-    const struct sbrec_chassis_private **chassis_private);
+    const struct sbrec_chassis_private **chassis_private,
+    struct ovsdb_idl_index *sbrec_chassis_encaps);
 bool chassis_cleanup(struct ovsdb_idl_txn *ovs_idl_txn,
                      struct ovsdb_idl_txn *ovnsb_idl_txn,
                      const struct ovsrec_open_vswitch_table *,
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 6fbf3a227..67a2c447c 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -6684,6 +6684,9 @@ main(int argc, char *argv[])
     struct ovsdb_idl_index *sbrec_learned_route_index_by_datapath
         = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
                                   &sbrec_learned_route_col_datapath);
+    struct ovsdb_idl_index *sbrec_encaps
+        = ovsdb_idl_index_create2(ovnsb_idl_loop.idl,
+                                  &sbrec_encap_col_type, &sbrec_encap_col_ip);
 
     ovsdb_idl_track_add_all(ovnsb_idl_loop.idl);
     ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
@@ -7401,7 +7404,7 @@ main(int argc, char *argv[])
                                       sbrec_chassis_private_by_name,
                                       ovs_table, chassis_id,
                                       br_int, &transport_zones,
-                                      &chassis_private);
+                                      &chassis_private, sbrec_encaps);
             }
 
             /* If any OVS feature support changed, force a full recompute.
diff --git a/lib/chassis-index.c b/lib/chassis-index.c
index 4b38036cb..953cc6ed4 100644
--- a/lib/chassis-index.c
+++ b/lib/chassis-index.c
@@ -115,3 +115,23 @@ ha_chassis_group_lookup_by_name(
 
     return retval;
 }
+
+/* Finds and returns the encap with the given ip and type, or NULL if no such
+ * encap exists. */
+const struct sbrec_encap *
+encap_lookup_by_ip_and_type(struct ovsdb_idl_index *sbrec_chassis_encaps,
+                       const char *ip, const char *type)
+{
+    struct sbrec_encap *target = sbrec_encap_index_init_row(
+        sbrec_chassis_encaps);
+
+    sbrec_encap_index_set_ip(target, ip);
+    sbrec_encap_index_set_type(target, type);
+
+    struct sbrec_encap *retval = sbrec_encap_index_find(
+        sbrec_chassis_encaps, target);
+
+    sbrec_encap_index_destroy_row(target);
+
+    return retval;
+}
diff --git a/lib/chassis-index.h b/lib/chassis-index.h
index bc654da13..7161458b2 100644
--- a/lib/chassis-index.h
+++ b/lib/chassis-index.h
@@ -35,5 +35,8 @@ chassis_private_lookup_by_name(
 struct ovsdb_idl_index *ha_chassis_group_index_create(struct ovsdb_idl *idl);
 const struct sbrec_ha_chassis_group *ha_chassis_group_lookup_by_name(
     struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name, const char *name);
+const struct sbrec_encap *encap_lookup_by_ip_and_type(
+    struct ovsdb_idl_index *sbrec_chassis_encaps,
+    const char *ip, const char *type);
 
 #endif /* lib/chassis-index.h */
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 7bc1aca59..6713f1a2b 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -277,7 +277,8 @@ ovs-vsctl -- set Open_vSwitch . 
external-ids:hostname="${new_sysid}" \
           -- set Open_vSwitch . external-ids:ovn-remote="${current_remote}"
 
 OVS_WAIT_UNTIL([
-    grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have 
identical values' hv/ovn-controller.log
+    grep -q "'hv' already has encap ip '192.168.0.1' and type 'geneve', " \
+            "cannot duplicate on 'hv-foo'" hv/ovn-controller.log
 ])
 
 # Destroy the stale entries manually and ovn-controller should now be able
@@ -305,12 +306,12 @@ check ovn-sbctl destroy chassis_private . -- destroy 
chassis .
 # - could not create datapath br-int of unknown type foobar
 # - unknown datapath type bar
 # - could not create datapath br-int of unknown type bar
-# - Transaction causes multiple rows ...
 # - failed to create bridge br-int: Address family not supported by protocol # 
due to previous errors.
+# - 'hv' already has encap ip '192.168.0.1' and type 'geneve', cannot 
duplicate on 'hv-foo'
 OVN_CLEANUP_SBOX([hv], ["/foo/d
 /bar/d
-/Transaction causes multiple rows/d
-/failed to create bridge/d"])
+/failed to create bridge/d
+/already has encap ip.*cannot duplicate on/d"])
 
 OVN_CLEANUP_VSWITCH([main])
 as ovn-sb
@@ -3821,3 +3822,30 @@ wait_ports 0
 OVN_CLEANUP([hv1
 /Invalid VXLAN port number.*/d])
 AT_CLEANUP
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - two encaps same IP and type])
+AT_KEYWORDS([ovn])
+ovn_start
+net_add n1
+
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.11
+
+sim_add hv2
+as hv2
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.12
+
+# Set hv2's encap IP to be the same as hv1's.
+check ovs-vsctl set Open_vSwitch . external-ids:ovn-encap-ip="192.168.0.11"
+wait_row_count Encap 0 ip="192.168.0.12"
+
+# Make sure hv2 has been deleted because of the misconfiguration.
+wait_row_count Chassis 0 name="hv2"
+wait_row_count Chassis_Private 0 name="hv2"
+
+AT_CLEANUP
+])
-- 
2.51.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to