nb_cfg as a mechanism to "ping" OVN control plane is very useful
in many ways. However, the current implementation will trigger
update notifications flooding in the whole control plane. Each
HV updates to SB the nb_cfg number and all these updates are
notified to all the other HVs, which is O(n^2). Although updates
are batched in fewers notifications than n^2, it still generates
significant load on SB DB and also on ovn-controllers.

To solve this problem and make the mechanism more useful in large
scale producation deployment, this patch separates the per HV
nb_cfg data in SB from the Chassis table to a separate table, so
that each HV can conditionally monitor and get updates only for
its own record.

Test result shows great improvement:
In a test environment with 1K sandbox HVs, and 10K ports created
on 40 lswitches and 8 lrouters, do the sync test when the system
is idle, with command:

    time ovn-nbctl --wait=hv sync

Original result:
real    4m52.926s
user    0m0.328s
sys     0m0.004s

With this patch:
real    0m11.405s
user    0m0.316s
sys     0m0.016s

Signed-off-by: Han Zhou <hzh...@ebay.com>
---

Notes:
    v1 -> v2:
    - keep the old column in chassis table to avoid breaking upgrading
    - avoid the attempt of adding redundant entry during startup

 ovn/controller/chassis.c        | 35 ++++++++++++++++++++++++++++++++++-
 ovn/controller/chassis.h        |  9 ++++++---
 ovn/controller/ovn-controller.c | 21 ++++++++++++++++-----
 ovn/northd/ovn-northd.c         | 21 ++++++++++++++++++---
 ovn/ovn-sb.ovsschema            |  8 +++++++-
 ovn/ovn-sb.xml                  | 26 ++++++++++++++++++++++----
 6 files changed, 103 insertions(+), 17 deletions(-)

diff --git a/ovn/controller/chassis.c b/ovn/controller/chassis.c
index 6b5286a..6092dcc 100644
--- a/ovn/controller/chassis.c
+++ b/ovn/controller/chassis.c
@@ -72,12 +72,28 @@ get_cms_options(const struct smap *ext_ids)
     return smap_get_def(ext_ids, "ovn-cms-options", "");
 }
 
+static const struct sbrec_chassis_nb_cfg *
+find_chassis_nb_cfg(struct ovsdb_idl *ovnsb_idl, const char *chassis_id)
+{
+    const struct sbrec_chassis_nb_cfg *chassis_nb_cfg_rec;
+
+    SBREC_CHASSIS_NB_CFG_FOR_EACH (chassis_nb_cfg_rec, ovnsb_idl) {
+        if (!strcmp(chassis_nb_cfg_rec->chassis_name, chassis_id)) {
+            break;
+        }
+    }
+
+    return chassis_nb_cfg_rec;
+}
+
 /* Returns this chassis's Chassis record, if it is available and is currently
  * amenable to a transaction. */
 const struct sbrec_chassis *
 chassis_run(struct controller_ctx *ctx, const char *chassis_id,
-            const struct ovsrec_bridge *br_int)
+            const struct ovsrec_bridge *br_int,
+            const struct sbrec_chassis_nb_cfg **nb_cfg)
 {
+    *nb_cfg = NULL;
     if (!ctx->ovnsb_idl_txn) {
         return NULL;
     }
@@ -135,8 +151,17 @@ chassis_run(struct controller_ctx *ctx, const char 
*chassis_id,
     ds_chomp(&iface_types, ',');
     const char *iface_types_str = ds_cstr(&iface_types);
 
+    const struct sbrec_chassis_nb_cfg *chassis_nb_cfg_rec
+        = find_chassis_nb_cfg(ctx->ovnsb_idl, chassis_id);
+    if (!chassis_nb_cfg_rec) {
+        chassis_nb_cfg_rec = sbrec_chassis_nb_cfg_insert(ctx->ovnsb_idl_txn);
+        sbrec_chassis_nb_cfg_set_chassis_name(chassis_nb_cfg_rec, chassis_id);
+    }
+    *nb_cfg = chassis_nb_cfg_rec;
+
     const struct sbrec_chassis *chassis_rec
         = get_chassis(ctx->ovnsb_idl, chassis_id);
+
     const char *encap_csum = smap_get_def(&cfg->external_ids,
                                           "ovn-encap-csum", "true");
     if (chassis_rec) {
@@ -256,6 +281,14 @@ chassis_cleanup(struct controller_ctx *ctx,
                                   "ovn-controller: unregistering chassis '%s'",
                                   chassis_rec->name);
         sbrec_chassis_delete(chassis_rec);
+        const struct sbrec_chassis_nb_cfg *chassis_nb_cfg_rec
+            = find_chassis_nb_cfg(ctx->ovnsb_idl, chassis_rec->name);
+        if (chassis_nb_cfg_rec) {
+            sbrec_chassis_nb_cfg_delete(chassis_nb_cfg_rec);
+        } else {
+            VLOG_WARN("Chassis_NB_Cfg record didn't exist for chassis '%s'",
+                      chassis_rec->name);
+        }
     }
     return false;
 }
diff --git a/ovn/controller/chassis.h b/ovn/controller/chassis.h
index 2cc47fc..5ecf7f4 100644
--- a/ovn/controller/chassis.h
+++ b/ovn/controller/chassis.h
@@ -17,6 +17,7 @@
 #define OVN_CHASSIS_H 1
 
 #include <stdbool.h>
+#include "ovn/lib/ovn-sb-idl.h"
 
 struct controller_ctx;
 struct ovsdb_idl;
@@ -24,9 +25,11 @@ struct ovsrec_bridge;
 struct sbrec_chassis;
 
 void chassis_register_ovs_idl(struct ovsdb_idl *);
-const struct sbrec_chassis *chassis_run(struct controller_ctx *,
-                                        const char *chassis_id,
-                                        const struct ovsrec_bridge *br_int);
+const struct sbrec_chassis *chassis_run(
+            struct controller_ctx *,
+            const char *chassis_id,
+            const struct ovsrec_bridge *br_int,
+            const struct sbrec_chassis_nb_cfg **nb_cfg);
 bool chassis_cleanup(struct controller_ctx *, const struct sbrec_chassis *);
 
 #endif /* ovn/chassis.h */
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 7592bda..b2eb03b 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -150,6 +150,7 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
     struct ovsdb_idl_condition mb = OVSDB_IDL_CONDITION_INIT(&mb);
     struct ovsdb_idl_condition mg = OVSDB_IDL_CONDITION_INIT(&mg);
     struct ovsdb_idl_condition dns = OVSDB_IDL_CONDITION_INIT(&dns);
+    struct ovsdb_idl_condition nbcfg = OVSDB_IDL_CONDITION_INIT(&nbcfg);
     sbrec_port_binding_add_clause_type(&pb, OVSDB_F_EQ, "patch");
     /* XXX: We can optimize this, if we find a way to only monitor
      * ports that have a Gateway_Chassis that point's to our own
@@ -172,6 +173,12 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l2);
         const struct smap l3 = SMAP_CONST1(&l3, "l3gateway-chassis", id);
         sbrec_port_binding_add_clause_options(&pb, OVSDB_F_INCLUDES, &l3);
+
+        /* Monitors Chassis_NB_Cfg record for current chassis only */
+        sbrec_chassis_nb_cfg_add_clause_chassis_name(&nbcfg, OVSDB_F_EQ,
+                                                     chassis->name);
+    } else {
+        ovsdb_idl_condition_add_clause_true(&nbcfg);
     }
     if (local_ifaces) {
         const char *name;
@@ -198,11 +205,13 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
     sbrec_mac_binding_set_condition(ovnsb_idl, &mb);
     sbrec_multicast_group_set_condition(ovnsb_idl, &mg);
     sbrec_dns_set_condition(ovnsb_idl, &dns);
+    sbrec_chassis_nb_cfg_set_condition(ovnsb_idl, &nbcfg);
     ovsdb_idl_condition_destroy(&pb);
     ovsdb_idl_condition_destroy(&lf);
     ovsdb_idl_condition_destroy(&mb);
     ovsdb_idl_condition_destroy(&mg);
     ovsdb_idl_condition_destroy(&dns);
+    ovsdb_idl_condition_destroy(&nbcfg);
 }
 
 static const struct ovsrec_bridge *
@@ -621,7 +630,7 @@ main(int argc, char *argv[])
     create_ovnsb_indexes(ovnsb_idl_loop.idl);
     lport_init(ovnsb_idl_loop.idl);
 
-    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
+    ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_chassis_nb_cfg_col_nb_cfg);
     update_sb_monitors(ovnsb_idl_loop.idl, NULL, NULL, NULL);
     ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl);
 
@@ -685,8 +694,9 @@ main(int argc, char *argv[])
         chassis_index_init(&chassis_index, ctx.ovnsb_idl);
 
         const struct sbrec_chassis *chassis = NULL;
+        const struct sbrec_chassis_nb_cfg *chassis_nb_cfg = NULL;
         if (chassis_id) {
-            chassis = chassis_run(&ctx, chassis_id, br_int);
+            chassis = chassis_run(&ctx, chassis_id, br_int, &chassis_nb_cfg);
             encaps_run(&ctx, br_int, chassis_id);
             bfd_calculate_active_tunnels(br_int, &active_tunnels);
             binding_run(&ctx, br_int, chassis,
@@ -730,10 +740,11 @@ main(int argc, char *argv[])
 
                     hmap_destroy(&flow_table);
                 }
-                if (ctx.ovnsb_idl_txn) {
+                if (ctx.ovnsb_idl_txn && chassis_nb_cfg) {
                     int64_t cur_cfg = ofctrl_get_cur_cfg();
-                    if (cur_cfg && cur_cfg != chassis->nb_cfg) {
-                        sbrec_chassis_set_nb_cfg(chassis, cur_cfg);
+                    if (cur_cfg && cur_cfg != chassis_nb_cfg->nb_cfg) {
+                        sbrec_chassis_nb_cfg_set_nb_cfg(chassis_nb_cfg,
+                                                        cur_cfg);
                     }
                 }
             }
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 3963810..9f4e0bc 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -6483,6 +6483,11 @@ static const char *rbac_chassis_auth[] =
 static const char *rbac_chassis_update[] =
     {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
 
+static const char *rbac_chassis_nb_cfg_auth[] =
+    {"chassis_name"};
+static const char *rbac_chassis_nb_cfg_update[] =
+    {"nb_cfg"};
+
 static const char *rbac_encap_auth[] =
     {"chassis_name"};
 static const char *rbac_encap_update[] =
@@ -6516,6 +6521,14 @@ static struct rbac_perm_cfg {
         .n_update = ARRAY_SIZE(rbac_chassis_update),
         .row = NULL
     },{
+        .table = "Chassis_NB_Cfg",
+        .auth = rbac_chassis_nb_cfg_auth,
+        .n_auth = ARRAY_SIZE(rbac_chassis_nb_cfg_auth),
+        .insdel = true,
+        .update = rbac_chassis_nb_cfg_update,
+        .n_update = ARRAY_SIZE(rbac_chassis_nb_cfg_update),
+        .row = NULL
+    },{
         .table = "Encap",
         .auth = rbac_encap_auth,
         .n_auth = ARRAY_SIZE(rbac_encap_auth),
@@ -6676,9 +6689,9 @@ update_northbound_cfg(struct northd_context *ctx,
     /* Update northbound hv_cfg if appropriate. */
     if (nbg) {
         /* Find minimum nb_cfg among all chassis. */
-        const struct sbrec_chassis *chassis;
+        const struct sbrec_chassis_nb_cfg *chassis;
         int64_t hv_cfg = nbg->nb_cfg;
-        SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
+        SBREC_CHASSIS_NB_CFG_FOR_EACH (chassis, ctx->ovnsb_idl) {
             if (chassis->nb_cfg < hv_cfg) {
                 hv_cfg = chassis->nb_cfg;
             }
@@ -6911,9 +6924,11 @@ main(int argc, char *argv[])
     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
 
     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
-    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
     ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
 
+    ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis_nb_cfg);
+    ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_nb_cfg_col_nb_cfg);
+
     /* Ensure that only a single ovn-northd is active in the deployment by
      * acquiring a lock called "ovn_northd" on the southbound database
      * and then only performing DB transactions if the lock is held. */
diff --git a/ovn/ovn-sb.ovsschema b/ovn/ovn-sb.ovsschema
index 2abcc6b..39be87d 100644
--- a/ovn/ovn-sb.ovsschema
+++ b/ovn/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
 {
     "name": "OVN_Southbound",
     "version": "1.15.0",
-    "cksum": "70426956 13327",
+    "cksum": "1669322676 13561",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -36,6 +36,12 @@
                              "min": 0, "max": "unlimited"}}},
             "isRoot": true,
             "indexes": [["name"]]},
+        "Chassis_NB_Cfg": {
+            "columns": {
+                "chassis_name": {"type": "string"},
+                "nb_cfg": {"type": {"key": "integer"}}},
+            "isRoot": true,
+            "indexes": [["chassis_name"]]},
         "Encap": {
             "columns": {
                 "type": {"type": {"key": {
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 6a8b818..40e94b5 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -212,10 +212,8 @@
     </column>
 
     <column name="nb_cfg">
-      Sequence number for the configuration.  When <code>ovn-controller</code>
-      updates the configuration of a chassis from the contents of the
-      southbound database, it copies <ref table="SB_Global" column="nb_cfg"/>
-      from the <ref table="SB_Global"/> table into this column.
+      Deprecated. This column is replaced by the <ref table="Chassis_NB_Cfg"
+      column="nb_cfg"/> column of the <ref table="Chassis_NB_Cfg"/> table.
     </column>
 
     <column name="external_ids" key="ovn-bridge-mappings">
@@ -291,6 +289,26 @@
     </group>
   </table>
 
+  <table name="Chassis_NB_Cfg" title="Chassis NB Config">
+    <p>
+      Each row in this table maintains the nb_cfg number that the corresponding
+      chassis with <ref table="Chassis" column="name"/> in <ref 
table="Chassis"/>
+      has processed. This information is stored in this separate table for
+      performance considerations.
+    </p>
+
+    <column name="nb_cfg">
+      Sequence number for the configuration.  When <code>ovn-controller</code>
+      updates the configuration of a chassis from the contents of the
+      southbound database, it copies <ref table="SB_Global" column="nb_cfg"/>
+      from the <ref table="SB_Global"/> table into this column.
+    </column>
+
+    <column name="chassis_name">
+      The name of the chassis that created this encap.
+    </column>
+  </table>
+
   <table name="Encap" title="Encapsulation Types">
     <p>
       The <ref column="encaps" table="Chassis"/> column in the <ref
-- 
2.1.0

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to