Thanks Lorenzo!

Acked-by: Mark Michelson <[email protected]>

On 8/2/24 09:24, Lorenzo Bianconi wrote:
Introduce ECMP_Nexthop table in the SB db in order to track active
ecmp-symmetric-reply connections and flush stale ones.

Signed-off-by: Lorenzo Bianconi <[email protected]>
---
  lib/ovn-util.h           |  2 ++
  northd/en-northd.c       | 35 ++++++++++++++++++++
  northd/en-northd.h       |  4 +++
  northd/inc-proc-northd.c |  7 +++-
  northd/northd.c          | 70 ++++++++++++++++++++++++++++++++++++++++
  northd/northd.h          | 10 ++++++
  ovn-sb.ovsschema         | 18 +++++++++--
  ovn-sb.xml               | 31 ++++++++++++++++++
  tests/ovn-northd.at      |  4 +++
  9 files changed, 178 insertions(+), 3 deletions(-)

diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index ae971ce5a..6963b85dc 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -38,6 +38,8 @@
  #define STT_TUNNEL_OVERHEAD 18
  #define VXLAN_TUNNEL_OVERHEAD 30
+#define ECMP_NEXTHOP_IDS_LEN 65535
+
  struct eth_addr;
  struct nbrec_logical_router_port;
  struct ovsrec_flow_sample_collector_set_table;
diff --git a/northd/en-northd.c b/northd/en-northd.c
index dd2b4353d..6358ae4a5 100644
--- a/northd/en-northd.c
+++ b/northd/en-northd.c
@@ -404,6 +404,25 @@ en_bfd_sync_run(struct engine_node *node, void *data)
      engine_set_node_state(node, EN_UPDATED);
  }
+void
+en_ecmp_nexthop_run(struct engine_node *node, void *data)
+{
+    const struct engine_context *eng_ctx = engine_get_context();
+    struct static_routes_data *static_routes_data =
+        engine_get_input_data("static_routes", node);
+    struct ecmp_nexthop_data *enh_data = data;
+    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
+        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
+
+    ecmp_nexthop_destroy(data);
+    ecmp_nexthop_init(data);
+    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
+                             &static_routes_data->parsed_routes,
+                             &enh_data->nexthops,
+                             sbrec_ecmp_nexthop_table);
+    engine_set_node_state(node, EN_UPDATED);
+}
+
  void
  *en_northd_init(struct engine_node *node OVS_UNUSED,
                  struct engine_arg *arg OVS_UNUSED)
@@ -454,6 +473,16 @@ void
      return data;
  }
+void
+*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                      struct engine_arg *arg OVS_UNUSED)
+{
+    struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
+
+    ecmp_nexthop_init(data);
+    return data;
+}
+
  void
  en_northd_cleanup(void *data)
  {
@@ -490,3 +519,9 @@ en_bfd_sync_cleanup(void *data)
  {
      bfd_destroy(data);
  }
+
+void
+en_ecmp_nexthop_cleanup(void *data)
+{
+    ecmp_nexthop_destroy(data);
+}
diff --git a/northd/en-northd.h b/northd/en-northd.h
index f2d0ce8ba..48b9f1d7e 100644
--- a/northd/en-northd.h
+++ b/northd/en-northd.h
@@ -41,5 +41,9 @@ bool bfd_sync_northd_change_handler(struct engine_node *node,
                                      void *data OVS_UNUSED);
  void en_bfd_sync_run(struct engine_node *node, void *data);
  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
+void en_ecmp_nexthop_run(struct engine_node *node, void *data);
+void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                           struct engine_arg *arg OVS_UNUSED);
+void en_ecmp_nexthop_cleanup(void *data);
#endif /* EN_NORTHD_H */
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 7d4c28b13..f00ad3643 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -101,7 +101,8 @@ static unixctl_cb_func chassis_features_list;
      SB_NODE(fdb, "fdb") \
      SB_NODE(static_mac_binding, "static_mac_binding") \
      SB_NODE(chassis_template_var, "chassis_template_var") \
-    SB_NODE(logical_dp_group, "logical_dp_group")
+    SB_NODE(logical_dp_group, "logical_dp_group") \
+    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
enum sb_engine_node {
  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
@@ -159,6 +160,7 @@ static ENGINE_NODE(route_policies, "route_policies");
  static ENGINE_NODE(static_routes, "static_routes");
  static ENGINE_NODE(bfd, "bfd");
  static ENGINE_NODE(bfd_sync, "bfd_sync");
+static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
                            struct ovsdb_idl_loop *sb)
@@ -258,6 +260,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
      engine_add_input(&en_bfd_sync, &en_northd, 
bfd_sync_northd_change_handler);
+ engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
+    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
+
      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
diff --git a/northd/northd.c b/northd/northd.c
index e0b06ff9a..089795f2c 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -9991,6 +9991,64 @@ build_bfd_map(const struct nbrec_bfd_table 
*nbrec_bfd_table,
      }
  }
+void
+build_ecmp_nexthop_table(
+        struct ovsdb_idl_txn *ovnsb_txn,
+        struct hmap *routes,
+        struct simap *nexthops,
+        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
+{
+    if (!ovnsb_txn) {
+        return;
+    }
+
+    unsigned long *nexthop_ids = bitmap_allocate(ECMP_NEXTHOP_IDS_LEN);
+    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
+                                       sbrec_ecmp_nexthop_table) {
+        simap_put(nexthops, sb_ecmp_nexthop->nexthop,
+                  sb_ecmp_nexthop->id);
+        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
+    }
+
+    struct sset nb_nexthops_sset = SSET_INITIALIZER(&nb_nexthops_sset);
+
+    struct parsed_route *pr;
+    HMAP_FOR_EACH (pr, key_node, routes) {
+        if (!pr->ecmp_symmetric_reply) {
+            continue;
+        }
+
+        const struct nbrec_logical_router_static_route *r = pr->route;
+        if (!simap_contains(nexthops, r->nexthop)) {
+            int id = bitmap_scan(nexthop_ids, 0, 1, ECMP_NEXTHOP_IDS_LEN);
+            if (id == ECMP_NEXTHOP_IDS_LEN) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+                VLOG_WARN_RL(&rl, "nexthop id address space is exhausted");
+                continue;
+            }
+            bitmap_set1(nexthop_ids, id);
+            simap_put(nexthops, r->nexthop, id);
+
+            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
+            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
+            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
+        }
+        sset_add(&nb_nexthops_sset, r->nexthop);
+    }
+
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
+                                            sbrec_ecmp_nexthop_table) {
+        if (!sset_contains(&nb_nexthops_sset, sb_ecmp_nexthop->nexthop)) {
+            simap_find_and_delete(nexthops, sb_ecmp_nexthop->nexthop);
+            sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
+        }
+    }
+
+    sset_destroy(&nb_nexthops_sset);
+    bitmap_free(nexthop_ids);
+}
+
  /* Returns a string of the IP address of the router port 'op' that
   * overlaps with 'ip_s".  If one is not found, returns NULL.
   *
@@ -17775,6 +17833,12 @@ bfd_init(struct bfd_data *data)
      hmap_init(&data->bfd_connections);
  }
+void
+ecmp_nexthop_init(struct ecmp_nexthop_data *data)
+{
+    simap_init(&data->nexthops);
+}
+
  void
  northd_destroy(struct northd_data *data)
  {
@@ -17856,6 +17920,12 @@ static_routes_destroy(struct static_routes_data *data)
      __bfd_destroy(&data->bfd_active_connections);
  }
+void
+ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
+{
+    simap_destroy(&data->nexthops);
+}
+
  void
  ovnnb_db_run(struct northd_input *input_data,
               struct northd_data *data,
diff --git a/northd/northd.h b/northd/northd.h
index 432072f9f..338dd9335 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -185,6 +185,10 @@ struct bfd_data {
      struct hmap bfd_connections;
  };
+struct ecmp_nexthop_data {
+    struct simap nexthops;
+};
+
  struct lr_nat_table;
struct lflow_input {
@@ -722,6 +726,12 @@ void static_routes_destroy(struct static_routes_data *);
  void bfd_init(struct bfd_data *);
  void bfd_destroy(struct bfd_data *);
+void build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
+                              struct hmap *, struct simap *,
+                              const struct sbrec_ecmp_nexthop_table *);
+void ecmp_nexthop_init(struct ecmp_nexthop_data *);
+void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
+
  struct lflow_table;
  struct lr_stateful_tracked_data;
  struct ls_stateful_tracked_data;
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index b6c051ae6..a4d1630b0 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
  {
      "name": "OVN_Southbound",
-    "version": "20.34.0",
-    "cksum": "2786607656 31376",
+    "version": "20.35.0",
+    "cksum": "1887835491 32037",
      "tables": {
          "SB_Global": {
              "columns": {
@@ -608,6 +608,20 @@
                                            "refTable": "Datapath_Binding"}}}},
              "indexes": [["logical_port", "ip"]],
              "isRoot": true},
+        "ECMP_Nexthop": {
+            "columns": {
+                "nexthop": {"type": "string"},
+                "id": {"type": {"key": {"type": "integer",
+                                        "minInteger": 0,
+                                        "maxInteger": 65535}}},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}},
+                "options": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "indexes": [["nexthop"]],
+            "isRoot": true},
          "Chassis_Template_Var": {
              "columns": {
                  "chassis": {"type": "string"},
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 90f113afd..4cc9c9607 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -5160,4 +5160,35 @@ tcp.flags = RST;
        The set of variable values for a given chassis.
      </column>
    </table>
+
+  <table name="ECMP_Nexthop">
+    <p>
+      Each record in this table represents an active ECMP route committed by
+      <code>ovn-northd</code> to <code>ovs</code> connection-tracking table.
+      <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code>
+      to track active ct entries and to flush stale ones.
+    </p>
+    <column name="nexthop">
+      <p>
+        Nexthop IP address for this ECMP route. Nexthop IP address should
+        be the IP address of a connected router port or the IP address of
+        an external device used as nexthop for the given destination.
+      </p>
+    </column>
+
+    <column name="id">
+      <p>
+        Nexthop unique identifier. Nexthop ID is used to track active
+        ecmp-symmetric-reply connections and flush stale ones.
+      </p>
+    </column>
+
+    <column name="options">
+      Reserved for future use.
+    </column>
+
+    <column name="external_ids">
+      See <em>External IDs</em> at the beginning of this document.
+    </column>
+  </table>
  </database>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 710500dce..345dc7c3e 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6754,6 +6754,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
+check_row_count ECMP_Nexthop 1
ovn-sbctl dump-flows lr0 > lr0flows @@ -6771,6 +6772,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
  ])
check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
+check_row_count ECMP_Nexthop 2
ovn-sbctl dump-flows lr0 > lr0flows
  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6800,6 +6802,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | 
ovn_strip_lflows], [0], [
# add ecmp route with wrong nexthop
  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
192.168.1.20
+check_row_count ECMP_Nexthop 2
ovn-sbctl dump-flows lr0 > lr0flows
  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6819,6 +6822,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 
's/192\.168\.0\..0/192.
check ovn-nbctl lr-route-del lr0
  wait_row_count nb:Logical_Router_Static_Route 0
+check_row_count ECMP_Nexthop 0
check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
  ovn-sbctl dump-flows lr0 > lr0flows

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to