Hi Lorenzo,

On 9/19/24 19:08, Lorenzo Bianconi wrote:
> Introduce ECMP_Nexthop table in the SB db in order to track active
> ecmp-symmetric-reply connections and flush stale ones. ECMP_Nexthop
> table contains ip and mac address for each active nexthop.
> 
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
>  northd/en-northd.c       | 27 ++++++++++++++++++++
>  northd/en-northd.h       |  4 +++
>  northd/inc-proc-northd.c |  8 +++++-
>  northd/northd.c          | 53 +++++++++++++++++++++++++++++++++++++++-
>  northd/northd.h          |  4 +++
>  ovn-sb.ovsschema         | 17 +++++++++++--
>  ovn-sb.xml               | 34 ++++++++++++++++++++++++++
>  tests/ovn-northd.at      | 18 ++++++++------
>  8 files changed, 154 insertions(+), 11 deletions(-)
> 
> diff --git a/northd/en-northd.c b/northd/en-northd.c
> index 24ed31517..b8227617b 100644
> --- a/northd/en-northd.c
> +++ b/northd/en-northd.c
> @@ -404,6 +404,21 @@ en_bfd_sync_run(struct engine_node *node, void *data)
>      engine_set_node_state(node, EN_UPDATED);
>  }
>  
> +void
> +en_ecmp_nexthop_run(struct engine_node *node, void *data OVS_UNUSED)
> +{
> +    const struct engine_context *eng_ctx = engine_get_context();
> +    struct static_routes_data *static_routes_data =
> +        engine_get_input_data("static_routes", node);
> +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> +
> +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> +                             &static_routes_data->parsed_routes,
> +                             sbrec_ecmp_nexthop_table);
> +    engine_set_node_state(node, EN_UPDATED);
> +}
> +
>  void
>  *en_northd_init(struct engine_node *node OVS_UNUSED,
>                  struct engine_arg *arg OVS_UNUSED)
> @@ -454,6 +469,13 @@ void
>      return data;
>  }
>  
> +void
> +*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,

Nit 'void *' on the same line.

> +                      struct engine_arg *arg OVS_UNUSED)
> +{
> +    return NULL;
> +}
> +
>  void
>  en_northd_cleanup(void *data)
>  {
> @@ -526,3 +548,8 @@ en_bfd_sync_cleanup(void *data)
>  {
>      bfd_sync_destroy(data);
>  }
> +
> +void
> +en_ecmp_nexthop_cleanup(void *data OVS_UNUSED)
> +{
> +}
> diff --git a/northd/en-northd.h b/northd/en-northd.h
> index 631a7c17a..2666cc67e 100644
> --- a/northd/en-northd.h
> +++ b/northd/en-northd.h
> @@ -42,5 +42,9 @@ bool bfd_sync_northd_change_handler(struct engine_node 
> *node,
>                                      void *data OVS_UNUSED);
>  void en_bfd_sync_run(struct engine_node *node, void *data);
>  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> +                           struct engine_arg *arg OVS_UNUSED);
> +void en_ecmp_nexthop_cleanup(void *data);
>  
>  #endif /* EN_NORTHD_H */
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index 1f79916a5..cdc080ef0 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list;
>      SB_NODE(fdb, "fdb") \
>      SB_NODE(static_mac_binding, "static_mac_binding") \
>      SB_NODE(chassis_template_var, "chassis_template_var") \
> -    SB_NODE(logical_dp_group, "logical_dp_group")
> +    SB_NODE(logical_dp_group, "logical_dp_group") \
> +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
>  
>  enum sb_engine_node {
>  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> @@ -162,6 +163,7 @@ static ENGINE_NODE(route_policies, "route_policies");
>  static ENGINE_NODE(static_routes, "static_routes");
>  static ENGINE_NODE(bfd, "bfd");
>  static ENGINE_NODE(bfd_sync, "bfd_sync");
> +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
>  
>  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>                            struct ovsdb_idl_loop *sb)
> @@ -264,6 +266,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
>      engine_add_input(&en_bfd_sync, &en_northd, 
> bfd_sync_northd_change_handler);
>  
> +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> +
>      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
>      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
>      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> @@ -334,6 +339,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_sync_from_sb, &en_sb_port_binding, NULL);
>      engine_add_input(&en_sync_from_sb, &en_sb_ha_chassis_group, NULL);
>  
> +    engine_add_input(&en_northd_output, &en_ecmp_nexthop, NULL);
>      engine_add_input(&en_northd_output, &en_sync_from_sb, NULL);
>      engine_add_input(&en_northd_output, &en_sync_to_sb,
>                       northd_output_sync_to_sb_handler);
> diff --git a/northd/northd.c b/northd/northd.c
> index a267cd5f8..3e3bb84ef 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -10665,6 +10665,55 @@ build_bfd_map(const struct nbrec_bfd_table 
> *nbrec_bfd_table,
>      }
>  }
>  
> +void
> +build_ecmp_nexthop_table(
> +        struct ovsdb_idl_txn *ovnsb_txn,
> +        struct hmap *routes,
> +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> +{
> +    if (!ovnsb_txn) {
> +        return;
> +    }
> +
> +    struct sset nb_nexthops_sset = SSET_INITIALIZER(&nb_nexthops_sset);
> +    struct sset sb_nexthops_sset = SSET_INITIALIZER(&sb_nexthops_sset);
> +
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> +                                       sbrec_ecmp_nexthop_table) {
> +        sset_add(&sb_nexthops_sset, sb_ecmp_nexthop->nexthop);
> +    }

This doesn't take into account datapath in any way.  See my comment
about adding "Datapath_Binding/Port_Binding" references to the
SB.ECMP_Next_Hop records.

> +
> +    struct parsed_route *pr;
> +    HMAP_FOR_EACH (pr, key_node, routes) {
> +        if (!pr->ecmp_symmetric_reply) {
> +            continue;
> +        }
> +
> +        if (!pr->out_port) {
> +            continue;
> +        }
> +
> +        const struct nbrec_logical_router_static_route *r = pr->route;
> +        if (!sset_contains(&sb_nexthops_sset, r->nexthop)) {
> +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> +            sbrec_ecmp_nexthop_set_port(sb_ecmp_nexthop, pr->out_port->key);
> +        }
> +        sset_add(&nb_nexthops_sset, r->nexthop);
> +    }
> +
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
> +                                            sbrec_ecmp_nexthop_table) {
> +        if (!sset_contains(&nb_nexthops_sset, sb_ecmp_nexthop->nexthop)) {
> +            sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
> +        }
> +    }
> +
> +    sset_destroy(&nb_nexthops_sset);
> +    sset_destroy(&sb_nexthops_sset);
> +}
> +
>  /* Returns a string of the IP address of the router port 'op' that
>   * overlaps with 'ip_s".  If one is not found, returns NULL.
>   *
> @@ -11105,10 +11154,11 @@ parsed_routes_add(struct ovn_datapath *od, const 
> struct hmap *lr_ports,
>      }
>  
>      /* Verify that ip_prefix and nexthop are on the same network. */
> +    struct ovn_port *out_port = NULL;
>      if (!is_discard_route &&
>          !find_static_route_outport(od, lr_ports, route,
>                                     IN6_IS_ADDR_V4MAPPED(&prefix),
> -                                   NULL, NULL)) {
> +                                   NULL, &out_port)) {

We can now also remove the other invocation of
find_static_route_outport(), in build_ecmp_route_flow():

https://github.com/ovn-org/ovn/blob/d276728a8eaddbdc2944225457d22734837b3089/northd/northd.c#L11664-L11668

I think it's fine to just use route_->out_port there.  It will save us a
lookup.

>          return;
>      }
>  
> @@ -11151,6 +11201,7 @@ parsed_routes_add(struct ovn_datapath *od, const 
> struct hmap *lr_ports,
>      new_pr->hash = route_hash(new_pr);
>      new_pr->route = route;
>      new_pr->nbr = od->nbr;
> +    new_pr->out_port = out_port;
>      new_pr->ecmp_symmetric_reply = smap_get_bool(&route->options,
>                                                   "ecmp_symmetric_reply",
>                                                   false);
> diff --git a/northd/northd.h b/northd/northd.h
> index 8f76d642d..c4c2a5d7e 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -703,6 +703,7 @@ struct parsed_route {
>      uint32_t route_table_id;
>      uint32_t hash;
>      const struct nbrec_logical_router_static_route *route;
> +    struct ovn_port *out_port;
>      bool ecmp_symmetric_reply;
>      bool is_discard_route;
>      const struct nbrec_logical_router *nbr;
> @@ -745,6 +746,9 @@ void bfd_destroy(struct bfd_data *);
>  void bfd_sync_init(struct bfd_sync_data *);
>  void bfd_sync_destroy(struct bfd_sync_data *);
>  
> +void build_ecmp_nexthop_table(struct ovsdb_idl_txn *, struct hmap *,

IMO, raw hmap function arguments should have a name - it's not clear
what that map actually holds until we look at the implementation of the
function.

> +                              const struct sbrec_ecmp_nexthop_table *);
> +
>  struct lflow_table;
>  struct lr_stateful_tracked_data;
>  struct ls_stateful_tracked_data;
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index 73abf2c8d..870ee256b 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Southbound",
> -    "version": "20.37.0",
> -    "cksum": "1950136776 31493",
> +    "version": "20.38.0",
> +    "cksum": "3311058931 32061",
>      "tables": {
>          "SB_Global": {
>              "columns": {
> @@ -610,6 +610,19 @@
>                                            "refTable": "Datapath_Binding"}}}},
>              "indexes": [["logical_port", "ip"]],
>              "isRoot": true},
> +        "ECMP_Nexthop": {
> +            "columns": {
> +                "nexthop": {"type": "string"},
> +                "port": {"type": "string"},

Is there a reason why this is not a reference to "Port_Binding" instead
of a plain string?

Also, we're missing "Datapath_Binding" as a column (what if two
independent routers have the same next-hop IP?).  However, if we add
Port_Binding as an explicit reference we can get the datapath
information from there.

> +                "mac": {"type": "string"},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["nexthop"]],

Ditto: this is not enough, multiple routers might use the same next hop.

> +            "isRoot": true},
>          "Chassis_Template_Var": {
>              "columns": {
>                  "chassis": {"type": "string"},
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 746cc6308..b5dc34012 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -5178,4 +5178,38 @@ tcp.flags = RST;
>        The set of variable values for a given chassis.
>      </column>
>    </table>
> +
> +  <table name="ECMP_Nexthop">
> +    <p>
> +      Each record in this table represents an active ECMP route committed by
> +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking table.
> +      <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code>
> +      to track active ct entries and to flush stale ones.
> +    </p>
> +    <column name="nexthop">
> +      <p>
> +        Nexthop IP address for this ECMP route. Nexthop IP address should
> +        be the IP address of a connected router port or the IP address of
> +        an external device used as nexthop for the given destination.
> +      </p>
> +    </column>
> +    <column name="port">
> +      <p>
> +        Logical port used to connect to the configured next-hop.
> +      </p>
> +    </column>
> +    <column name="mac">
> +      <p>
> +        Nexthop mac address.
> +      </p>
> +    </column>
> +
> +    <column name="options">
> +      Reserved for future use.
> +    </column>
> +

Let's add this table only if needed?  It's kind of confusing to have it
here.  DB schema column additions are backwards compatible so we don't
need to already reserve columns for the future.

> +    <column name="external_ids">
> +      See <em>External IDs</em> at the beginning of this document.
> +    </column>
> +  </table>
>  </database>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index dcc3dbbc3..9923243fa 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -3878,7 +3878,7 @@ wait_row_count bfd 1 logical_port=r0-sw3 detect_mult=5 
> dst_ip=192.168.3.2 \
>  check_engine_stats northd norecompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +#check_engine_stats northd_output norecompute compute

I guess this is by accident - it should now be changed to "recompute
compute" I guess.

>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3893,7 +3893,7 @@ wait_row_count bfd 1 logical_port=r0-sw1 min_rx=1000 
> min_tx=1000 detect_mult=100
>  check_engine_stats northd norecompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3905,7 +3905,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats static_routes recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3921,7 +3921,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats static_routes recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3933,7 +3933,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats route_policies recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3968,7 +3968,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats static_routes recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -3983,7 +3983,7 @@ check_engine_stats northd recompute nocompute
>  check_engine_stats bfd recompute nocompute
>  check_engine_stats route_policies recompute nocompute
>  check_engine_stats lflow recompute nocompute
> -check_engine_stats northd_output norecompute compute
> +check_engine_stats northd_output recompute nocompute
>  CHECK_NO_CHANGE_AFTER_RECOMPUTE
>  check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>  
> @@ -6800,6 +6800,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
>  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
>  
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> 192.168.0.10
> +check_row_count ECMP_Nexthop 1

I'd make this check more explicit and ensure that SB.ECMP_Nexthop fields
are also set correctly.

>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  
> @@ -6817,6 +6818,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | 
> ovn_strip_lflows], [0], [dn
>  ])
>  
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> 192.168.0.20
> +check_row_count ECMP_Nexthop 2
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
> @@ -6846,6 +6848,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | 
> ovn_strip_lflows], [0], [
>  
>  # add ecmp route with wrong nexthop
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 
> 192.168.1.20
> +check_row_count ECMP_Nexthop 2
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -w "lr_in_ip_routing" lr0flows | ovn_strip_lflows], [0], [dnl
> @@ -6865,6 +6868,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | 
> sed 's/192\.168\.0\..0/192.
>  
>  check ovn-nbctl lr-route-del lr0
>  wait_row_count nb:Logical_Router_Static_Route 0
> +check_row_count ECMP_Nexthop 0
>  
>  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>  ovn-sbctl dump-flows lr0 > lr0flows

Regards,
Dumitru

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to