When the OVS system-id changes ovn-controller needs external (CMS) help
in order to update its own Chassis/Chassis_private records, i.e., the
CMS has to ensure that either ovn-controller is stopped (so that
ovn-controller cleans up its old Chassis/Chassis_private records) or
that after the system-id is changed, the stale Chassis/Chassis_private
records are destroyed externally.
This patch reverts the previous efforts to have ovn-controller reuse
stale Chassis records and documents how the system-id change operation
needs to be executed. The main problem with reusing stale records is
that there's no safe way to make it work when RBAC is enabled.
Suggestedy-by: Han Zhou <[email protected]>
Reported-at:
https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374608.html
Fixes: f26c4a530bca ("ovn-controller: Fix chassis ovn-sbdb record init")
Fixes: 4465f553ee70 ("ovn-controller: Update stale chassis entry at init")
Fixes: 94a32fca2d2b ("chassis: Fix the way encaps are updated for a chassis
record.")
Fixes: dce1af31b550 ("chassis: Fix chassis_private record updates when the system-id
changes.")
Signed-off-by: Dumitru Ceara <[email protected]>
---
controller/chassis.c | 177 ++++------------------------------------
controller/chassis.h | 3 -
controller/ovn-controller.8.xml | 7 +-
controller/ovn-controller.c | 21 +++--
northd/ovn-northd.c | 6 +-
tests/ovn-controller.at | 44 +++-------
6 files changed, 47 insertions(+), 211 deletions(-)
diff --git a/controller/chassis.c b/controller/chassis.c
index 7748fb9..b4d4b0e 100644
--- a/controller/chassis.c
+++ b/controller/chassis.c
@@ -37,44 +37,6 @@ VLOG_DEFINE_THIS_MODULE(chassis);
#endif /* HOST_NAME_MAX */
/*
- * Structure to hold chassis specific state (currently just chassis-id)
- * to avoid database lookups when changes happen while the controller is
- * running.
- */
-struct chassis_info {
- /* Last ID we initialized the Chassis SB record with. */
- struct ds id;
-
- /* True if Chassis SB record is initialized, false otherwise. */
- uint32_t id_inited : 1;
-};
-
-static struct chassis_info chassis_state = {
- .id = DS_EMPTY_INITIALIZER,
- .id_inited = false,
-};
-
-static void
-chassis_info_set_id(struct chassis_info *info, const char *id)
-{
- ds_clear(&info->id);
- ds_put_cstr(&info->id, id);
- info->id_inited = true;
-}
-
-static bool
-chassis_info_id_inited(const struct chassis_info *info)
-{
- return info->id_inited;
-}
-
-static const char *
-chassis_info_id(const struct chassis_info *info)
-{
- return ds_cstr_ro(&info->id);
-}
-
-/*
* Structure for storing the chassis config parsed from the ovs table.
*/
struct ovs_chassis_cfg {
@@ -420,6 +382,9 @@ chassis_tunnels_changed(const struct sset *encap_type_set,
bool changed = false;
for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
+ if (strcmp(chassis_rec->name, chassis_rec->encaps[i]->chassis_name)) {
+ return true;
+ }
if (!sset_contains(encap_type_set, chassis_rec->encaps[i]->type)) {
changed = true;
@@ -500,54 +465,11 @@ chassis_build_encaps(struct ovsdb_idl_txn *ovnsb_idl_txn,
return encaps;
}
-/*
- * Updates encaps for a given chassis. This can happen when the chassis
- * name has changed. Also, the only thing we support updating is the
- * chassis_name. For other changes the encaps will be recreated.
- */
-static void
-chassis_update_encaps(const struct sbrec_chassis *chassis)
-{
- for (size_t i = 0; i < chassis->n_encaps; i++) {
- sbrec_encap_set_chassis_name(chassis->encaps[i], chassis->name);
- }
-}
-
-/*
- * Returns a pointer to a chassis record from 'chassis_table' that
- * matches at least one tunnel config.
- */
-static const struct sbrec_chassis *
-chassis_get_stale_record(const struct sbrec_chassis_table *chassis_table,
- const struct ovs_chassis_cfg *ovs_cfg,
- const char *chassis_id)
-{
- const struct sbrec_chassis *chassis_rec;
-
- SBREC_CHASSIS_TABLE_FOR_EACH (chassis_rec, chassis_table) {
- for (size_t i = 0; i < chassis_rec->n_encaps; i++) {
- if (sset_contains(&ovs_cfg->encap_type_set,
- chassis_rec->encaps[i]->type) &&
- sset_contains(&ovs_cfg->encap_ip_set,
- chassis_rec->encaps[i]->ip)) {
- return chassis_rec;
- }
- if (strcmp(chassis_rec->name, chassis_id) == 0) {
- return chassis_rec;
- }
- }
- }
-
- return NULL;
-}
-
/* If this is a chassis config update after we initialized the record once
* then we should always be able to find it with the ID we saved in
* chassis_state.
* Otherwise (i.e., first time we create the record or if the system-id
- * changed) then we check if there's a stale record from a previous
- * controller run that didn't end gracefully and reuse it. If not then we
- * create a new record.
+ * changed) we create a new record.
*
* Sets '*chassis_rec' to point to the local chassis record.
* Returns true if this record was already in the database, false if it was
@@ -556,33 +478,16 @@ chassis_get_stale_record(const struct sbrec_chassis_table
*chassis_table,
static bool
chassis_get_record(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_chassis_by_name,
- const struct sbrec_chassis_table *chassis_table,
- const struct ovs_chassis_cfg *ovs_cfg,
const char *chassis_id,
const struct sbrec_chassis **chassis_rec)
{
- const struct sbrec_chassis *chassis = NULL;
-
- if (chassis_info_id_inited(&chassis_state)) {
- chassis = chassis_lookup_by_name(sbrec_chassis_by_name,
- chassis_info_id(&chassis_state));
- if (!chassis) {
- VLOG_DBG("Could not find Chassis, will check if the id changed: "
- "stored (%s) ovs (%s)",
- chassis_info_id(&chassis_state), chassis_id);
- }
- }
-
- if (!chassis) {
- chassis = chassis_get_stale_record(chassis_table, ovs_cfg, chassis_id);
- }
+ const struct sbrec_chassis *chassis =
+ chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
- if (!chassis) {
- /* Recreate the chassis record. */
+ if (!chassis && ovnsb_idl_txn) {
+ /* Create the chassis record. */
VLOG_DBG("Could not find Chassis, will create it: %s", chassis_id);
- if (ovnsb_idl_txn) {
- *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
- }
+ *chassis_rec = sbrec_chassis_insert(ovnsb_idl_txn);
return false;
}
@@ -650,7 +555,6 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
&ovs_cfg->encap_ip_set, ovs_cfg->encap_csum,
chassis_rec);
if (!tunnels_changed) {
- chassis_update_encaps(chassis_rec);
return updated;
}
@@ -666,33 +570,11 @@ chassis_update(const struct sbrec_chassis *chassis_rec,
return true;
}
-/*
- * Returns a pointer to a chassis_private record from 'chassis_pvt_table' that
- * matches the chassis record.
- */
-static const struct sbrec_chassis_private *
-chassis_private_get_stale_record(
- const struct sbrec_chassis_private_table *chassis_pvt_table,
- const struct sbrec_chassis *chassis)
-{
- const struct sbrec_chassis_private *chassis_pvt_rec;
-
- SBREC_CHASSIS_PRIVATE_TABLE_FOR_EACH (chassis_pvt_rec, chassis_pvt_table) {
- if (chassis_pvt_rec->chassis == chassis) {
- return chassis_pvt_rec;
- }
- }
-
- return NULL;
-}
-
/* If this is a chassis_private config update after we initialized the record
* once then we should always be able to find it with the ID we saved in
* chassis_state.
* Otherwise (i.e., first time we created the chassis record or if the
- * system-id changed) then we check if there's a stale record from a previous
- * controller run that didn't end gracefully and reuse it. If not then we
- * create a new record.
+ * system-id changed) we create a new record.
*
* Returns the local chassis record.
*/
@@ -700,21 +582,11 @@ static const struct sbrec_chassis_private *
chassis_private_get_record(
struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_chassis_pvt_by_name,
- const struct sbrec_chassis_private_table *chassis_pvt_table,
- const struct sbrec_chassis *chassis)
+ const char *chassis_id)
{
- const struct sbrec_chassis_private *chassis_p = NULL;
-
- if (chassis_info_id_inited(&chassis_state)) {
- chassis_p =
+ const struct sbrec_chassis_private *chassis_p =
chassis_private_lookup_by_name(sbrec_chassis_pvt_by_name,
- chassis_info_id(&chassis_state));
- }
-
- if (!chassis_p) {
- chassis_p = chassis_private_get_stale_record(chassis_pvt_table,
- chassis);
- }
+ chassis_id);
if (!chassis_p && ovnsb_idl_txn) {
return sbrec_chassis_private_insert(ovnsb_idl_txn);
@@ -743,8 +615,6 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
struct ovsdb_idl_index *sbrec_chassis_by_name,
struct ovsdb_idl_index *sbrec_chassis_private_by_name,
const struct ovsrec_open_vswitch_table *ovs_table,
- const struct sbrec_chassis_table *chassis_table,
- const struct sbrec_chassis_private_table *chassis_pvt_table,
const char *chassis_id,
const struct ovsrec_bridge *br_int,
const struct sset *transport_zones,
@@ -762,8 +632,7 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
const struct sbrec_chassis *chassis_rec = NULL;
bool existed = chassis_get_record(ovnsb_idl_txn, sbrec_chassis_by_name,
- chassis_table, &ovs_cfg, chassis_id,
- &chassis_rec);
+ chassis_id, &chassis_rec);
/* If we found (or created) a record, update it with the correct config
* and store the current chassis_id for fast lookup in case it gets
@@ -783,13 +652,10 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
*chassis_private =
chassis_private_get_record(ovnsb_idl_txn,
sbrec_chassis_private_by_name,
- chassis_pvt_table, chassis_rec);
-
+ chassis_id);
if (*chassis_private) {
chassis_private_update(*chassis_private, chassis_rec, chassis_id);
}
-
- chassis_info_set_id(&chassis_state, chassis_id);
}
ovs_chassis_cfg_destroy(&ovs_cfg);
@@ -865,16 +731,3 @@ chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
}
return false;
}
-
-/*
- * Returns the last initialized chassis-id.
- */
-const char *
-chassis_get_id(void)
-{
- if (chassis_info_id_inited(&chassis_state)) {
- return chassis_info_id(&chassis_state);
- }
-
- return NULL;
-}
diff --git a/controller/chassis.h b/controller/chassis.h
index 220f726..18b45a1 100644
--- a/controller/chassis.h
+++ b/controller/chassis.h
@@ -37,8 +37,6 @@ const struct sbrec_chassis *chassis_run(
struct ovsdb_idl_index *sbrec_chassis_by_name,
struct ovsdb_idl_index *sbrec_chassis_private_by_name,
const struct ovsrec_open_vswitch_table *,
- const struct sbrec_chassis_table *,
- const struct sbrec_chassis_private_table *,
const char *chassis_id, const struct ovsrec_bridge *br_int,
const struct sset *transport_zones,
const struct sbrec_chassis_private **chassis_private);
@@ -48,7 +46,6 @@ bool chassis_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
bool chassis_get_mac(const struct sbrec_chassis *chassis,
const char *bridge_mapping,
struct eth_addr *chassis_mac);
-const char *chassis_get_id(void);
const char * get_chassis_mac_mappings(const struct smap *ext_ids);
diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index 6f3d388..29833c7 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -68,7 +68,12 @@
</p>
<dl>
<dt><code>external_ids:system-id</code></dt>
- <dd>The chassis name to use in the Chassis table.</dd>
+ <dd>The chassis name to use in the Chassis table. Changing the
+ <code>system-id</code> while <code>ovn-controller</code> is running is
+ not directly supported. Users have two options: either first
+ gracefully stop <code>ovn-controller</code> or manually delete the
+ stale <code>Chassis</code> and <code>Chassis_Private</code> records
+ after changing the <code>system-id</code>.</dd>
<dt><code>external_ids:hostname</code></dt>
<dd>The hostname to use in the Chassis table.</dd>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 90a0d9c..b5f0c13 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1777,7 +1777,7 @@ static void init_physical_ctx(struct engine_node *node,
(struct ovsrec_bridge_table *)EN_OVSDB_GET(
engine_get_input("OVS_bridge", node));
const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
- const char *chassis_id = chassis_get_id();
+ const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis = NULL;
struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
@@ -1867,7 +1867,11 @@ static void init_lflow_ctx(struct engine_node *node,
(struct sbrec_load_balancer_table *)EN_OVSDB_GET(
engine_get_input("SB_load_balancer", node));
- const char *chassis_id = chassis_get_id();
+ struct ovsrec_open_vswitch_table *ovs_table =
+ (struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
+ engine_get_input("OVS_open_vswitch", node));
+
+ const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis = NULL;
struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
@@ -1961,7 +1965,7 @@ en_flow_output_run(struct engine_node *node, void *data)
(struct ovsrec_bridge_table *)EN_OVSDB_GET(
engine_get_input("OVS_bridge", node));
const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
- const char *chassis_id = chassis_get_id();
+ const char *chassis_id = get_ovs_chassis_id(ovs_table);
struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
@@ -2145,7 +2149,7 @@ _flow_output_resource_ref_handler(struct engine_node
*node, void *data,
(struct ovsrec_bridge_table *)EN_OVSDB_GET(
engine_get_input("OVS_bridge", node));
const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
- const char *chassis_id = chassis_get_id();
+ const char *chassis_id = get_ovs_chassis_id(ovs_table);
struct ovsdb_idl_index *sbrec_chassis_by_name =
engine_ovsdb_node_get_index(
@@ -2770,18 +2774,13 @@ main(int argc, char *argv[])
get_transport_zones(ovsrec_open_vswitch_table_get(
ovs_idl_loop.idl)), ",");
- const struct sbrec_chassis_table *chassis_table =
- sbrec_chassis_table_get(ovnsb_idl_loop.idl);
- const struct sbrec_chassis_private_table *chassis_pvt_table =
- sbrec_chassis_private_table_get(ovnsb_idl_loop.idl);
const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis = NULL;
const struct sbrec_chassis_private *chassis_private = NULL;
if (chassis_id) {
chassis = chassis_run(ovnsb_idl_txn, sbrec_chassis_by_name,
sbrec_chassis_private_by_name,
- ovs_table, chassis_table,
- chassis_pvt_table, chassis_id,
+ ovs_table, chassis_id,
br_int, &transport_zones,
&chassis_private);
}
@@ -3020,7 +3019,7 @@ loop_done:
const struct ovsrec_bridge *br_int = get_br_int(bridge_table,
ovs_table);
- const char *chassis_id = chassis_get_id();
+ const char *chassis_id = get_ovs_chassis_id(ovs_table);
const struct sbrec_chassis *chassis
= (chassis_id
? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 9572423..8dc41ab 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -12637,17 +12637,17 @@ static const char *rbac_chassis_auth[] =
{"name"};
static const char *rbac_chassis_update[] =
{"nb_cfg", "external_ids", "encaps", "vtep_logical_switches",
- "other_config", "name"};
+ "other_config"};
static const char *rbac_chassis_private_auth[] =
{"name"};
static const char *rbac_chassis_private_update[] =
- {"nb_cfg", "nb_cfg_timestamp", "chassis", "name"};
+ {"nb_cfg", "nb_cfg_timestamp", "chassis"};
static const char *rbac_encap_auth[] =
{"chassis_name"};
static const char *rbac_encap_update[] =
- {"type", "options", "ip", "chassis_name"};
+ {"type", "options", "ip"};
static const char *rbac_port_binding_auth[] =
{""};
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index a1f35fb..1b46799 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -187,45 +187,27 @@ OVS_WAIT_UNTIL([
test "${expected_iface_types}" = "${chassis_iface_types}"
])
-# Change the value of external_ids:system-id and make sure it's mirrored
-# in the Chassis record in the OVN_Southbound database.
+# Change the value of external_ids:system-id.
+# This requires operator intervention and removal of the stale chassis and
+# chassis_private records. Until that happens ovn-controller fails to
+# create the records due to constraint violation on the Encap table.
sysid=${sysid}-foo
ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
+
OVS_WAIT_UNTIL([
- chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
- test "${sysid}" = "${chassis_id}"
-])
-OVS_WAIT_UNTIL([
- chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
- test "${sysid}" = "${chassis_id}"
+ grep -q 'Transaction causes multiple rows in \\"Encap\\" table to have identical values (geneve and
\\"192.168.0.1\\") for index on columns \\"type\\" and \\"ip\\".' hv/ovn-controller.log
])
-# Only one Chassis_Private record should exist.
-wait_row_count Chassis_Private 1
+# Destroy the stale entries manually and ovn-controller should now be able
+# to create new ones.
+check ovn-sbctl destroy chassis_private . -- destroy chassis .
-# Simulate system-id changing while ovn-controller is disconnected from the
-# SB.
-valid_remote=$(ovs-vsctl get Open_vSwitch . external_ids:ovn-remote)
-invalid_remote=tcp:0.0.0.0:4242
-ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${invalid_remote}
-expected_state="not connected"
-OVS_WAIT_UNTIL([
- test "${expected_state}" = "$(ovn-appctl -t ovn-controller
connection-status)"
-])
-sysid=${sysid}-bar
-ovs-vsctl set Open_vSwitch . external-ids:system-id="${sysid}"
-ovs-vsctl set Open_vSwitch . external_ids:ovn-remote=${valid_remote}
-OVS_WAIT_UNTIL([
- chassis_id=$(ovn-sbctl get Chassis "${sysid}" name)
- test "${sysid}" = "${chassis_id}"
-])
-OVS_WAIT_UNTIL([
- chassis_id=$(ovn-sbctl get Chassis_Private "${sysid}" name)
- test "${sysid}" = "${chassis_id}"
-])
+wait_row_count Chassis_Private 1 name=${sysid}
+wait_row_count Chassis 1 name=${sysid}
-# Only one Chassis_Private record should exist.
+# Only one Chassis_Private/Chassis record should exist.
wait_row_count Chassis_Private 1
+wait_row_count Chassis 1
# Gracefully terminate daemons
OVN_CLEANUP_SBOX([hv])