On Tue, Aug 27, 2024 at 12:18 PM Max Lamprecht via dev
<[email protected]> wrote:
>
> This adds a new engine node "bfd_chassis" which handles changes
> on the Sb_ha_chassis_group table.
>
> This improves heavily performance at scale.
>
> Without that patch we are permanently reconciling bfd_run and
> the ovn-controller is capped at 100% cpu usage. Especially wqon
> gateway chassis.
>
> 3000 HA_Chassis_Group (many ref_chassis)
> 1500 Chassis
>
> performance trace:
> - 98.86% main
> - 89.55% bfd_run
> - 89.23% bfd_calculate_chassis
> - 28.07% sset_add__
> - 27.86% sset_add
> - 18.20% smap_get_bool
> - 13.58% sset_destroy
>
> Signed-off-by: Max Lamprecht <[email protected]>
> Co-authored-by: Ihtisham ul Haq <[email protected]>
> Signed-off-by: Ihtisham ul Haq <[email protected]>
> Co-authored-by: Maxim Korezkij <[email protected]>
> Signed-off-by: Maxim Korezkij <[email protected]>
> Co-authored-by: Felix Huettner <[email protected]>
> Signed-off-by: Felix Huettner <[email protected]>
Thanks for the patch and sorry for late reviews.
The patch makes sense to me. I added the authors of this patch to the
AUTHORS.rst file
and applied it to the main.
Numan
> ---
> controller/bfd.c | 14 +++----
> controller/bfd.h | 7 +++-
> controller/ovn-controller.c | 79 +++++++++++++++++++++++++++++++++----
> 3 files changed, 84 insertions(+), 16 deletions(-)
>
> diff --git a/controller/bfd.c b/controller/bfd.c
> index 22a8c6695..9889b7e7a 100644
> --- a/controller/bfd.c
> +++ b/controller/bfd.c
> @@ -117,7 +117,7 @@ bfd_calculate_active_tunnels(const struct ovsrec_bridge
> *br_int,
> *
> * If 'our_chassis' is C5 then this function returns empty bfd set.
> */
> -static void
> +void
> bfd_calculate_chassis(
> const struct sbrec_chassis *our_chassis,
> const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table,
> @@ -154,7 +154,9 @@ bfd_calculate_chassis(
> if (smap_get_bool(&ref_ch->other_config, "is-remote",
> false)) {
> continue;
> }
> - sset_add(&grp_chassis, ref_ch->name);
> + /* we have bfd_setup_required == true anyway, so we skip
> adding
> + * it to an sset that we later move to another sset again. */
> + sset_add(bfd_chassis, ref_ch->name);
> }
> } else {
> /* This is not an HA chassis. Check if this chassis is present
> @@ -181,16 +183,13 @@ bfd_calculate_chassis(
> void
> bfd_run(const struct ovsrec_interface_table *interface_table,
> const struct ovsrec_bridge *br_int,
> + const struct sset *bfd_chassis,
> const struct sbrec_chassis *chassis_rec,
> - const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table,
> const struct sbrec_sb_global_table *sb_global_table)
> {
> if (!chassis_rec) {
> return;
> }
> - struct sset bfd_chassis = SSET_INITIALIZER(&bfd_chassis);
> - bfd_calculate_chassis(chassis_rec, ha_chassis_grp_table,
> - &bfd_chassis);
>
> /* Identify tunnels ports(connected to remote chassis id) to enable bfd
> */
> struct sset tunnels = SSET_INITIALIZER(&tunnels);
> @@ -205,7 +204,7 @@ bfd_run(const struct ovsrec_interface_table
> *interface_table,
> sset_add(&tunnels, port_name);
>
> if (encaps_tunnel_id_parse(tunnel_id, &chassis_name, NULL,
> NULL)) {
> - if (sset_contains(&bfd_chassis, chassis_name)) {
> + if (sset_contains(bfd_chassis, chassis_name)) {
> sset_add(&bfd_ifaces, port_name);
> }
> free(chassis_name);
> @@ -270,5 +269,4 @@ bfd_run(const struct ovsrec_interface_table
> *interface_table,
> smap_destroy(&bfd);
> sset_destroy(&tunnels);
> sset_destroy(&bfd_ifaces);
> - sset_destroy(&bfd_chassis);
> }
> diff --git a/controller/bfd.h b/controller/bfd.h
> index 17fab5323..be0d599b7 100644
> --- a/controller/bfd.h
> +++ b/controller/bfd.h
> @@ -31,10 +31,15 @@ void bfd_register_ovs_idl(struct ovsdb_idl *);
>
> void bfd_run(const struct ovsrec_interface_table *,
> const struct ovsrec_bridge *,
> + const struct sset *,
> const struct sbrec_chassis *,
> - const struct sbrec_ha_chassis_group_table *,
> const struct sbrec_sb_global_table *);
>
> +void bfd_calculate_chassis(
> + const struct sbrec_chassis *,
> + const struct sbrec_ha_chassis_group_table *,
> + struct sset *);
> +
> void bfd_calculate_active_tunnels(const struct ovsrec_bridge *br_int,
> struct sset *active_tunnels);
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 854d80bdf..bff73a6e7 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -841,6 +841,7 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
> #define SB_NODES \
> SB_NODE(sb_global, "sb_global") \
> SB_NODE(chassis, "chassis") \
> + SB_NODE(ha_chassis_group, "ha_chassis_group") \
> SB_NODE(encap, "encap") \
> SB_NODE(address_set, "address_set") \
> SB_NODE(port_group, "port_group") \
> @@ -3290,6 +3291,47 @@ en_mac_cache_cleanup(void *data)
> hmap_destroy(&cache_data->fdbs);
> }
>
> +struct ed_type_bfd_chassis {
> + struct sset bfd_chassis;
> +};
> +
> +static void *
> +en_bfd_chassis_init(struct engine_node *node OVS_UNUSED,
> + struct engine_arg *arg OVS_UNUSED)
> +{
> + struct ed_type_bfd_chassis *data = xzalloc(sizeof *data);
> + sset_init(&data->bfd_chassis);
> + return data;
> +}
> +
> +static void
> +en_bfd_chassis_run(struct engine_node *node, void *data OVS_UNUSED)
> +{
> + struct ed_type_bfd_chassis *bfd_chassis = data;
> + const struct ovsrec_open_vswitch_table *ovs_table =
> + EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
> + const char *chassis_id = get_ovs_chassis_id(ovs_table);
> + const struct sbrec_ha_chassis_group_table *ha_chassis_grp_table =
> + EN_OVSDB_GET(engine_get_input("SB_ha_chassis_group", node));
> + struct ovsdb_idl_index *sbrec_chassis_by_name =
> + engine_ovsdb_node_get_index(
> + engine_get_input("SB_chassis", node),
> + "name");
> + const struct sbrec_chassis *chassis
> + = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> +
> + sset_clear(&bfd_chassis->bfd_chassis);
> + bfd_calculate_chassis(chassis, ha_chassis_grp_table,
> + &bfd_chassis->bfd_chassis);
> + engine_set_node_state(node, EN_UPDATED);
> +}
> +
> +static void
> +en_bfd_chassis_cleanup(void *data OVS_UNUSED){
> + struct ed_type_bfd_chassis *bfd_chassis = data;
> + sset_destroy(&bfd_chassis->bfd_chassis);
> +}
> +
> /* Engine node which is used to handle the Non VIF data like
> * - OVS patch ports
> * - Tunnel ports and the related chassis information.
> @@ -4716,6 +4758,14 @@ controller_output_mac_cache_handler(struct engine_node
> *node,
> return true;
> }
>
> +static bool
> +controller_output_bfd_chassis_handler(struct engine_node *node,
> + void *data OVS_UNUSED)
> +{
> + engine_set_node_state(node, EN_UPDATED);
> + return true;
> +}
> +
> /* Handles sbrec_chassis changes.
> * If a new chassis is added or removed return false, so that
> * flows are recomputed. For any updates, there is no need for
> @@ -5025,6 +5075,7 @@ main(int argc, char *argv[])
> ENGINE_NODE(if_status_mgr, "if_status_mgr");
> ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data");
> ENGINE_NODE(mac_cache, "mac_cache");
> + ENGINE_NODE(bfd_chassis, "bfd_chassis");
>
> #define SB_NODE(NAME, NAME_STR) ENGINE_NODE_SB(NAME, NAME_STR);
> SB_NODES
> @@ -5064,6 +5115,10 @@ main(int argc, char *argv[])
>
> engine_add_input(&en_if_status_mgr, &en_ovs_interface,
> if_status_mgr_ovs_interface_handler);
> + engine_add_input(&en_bfd_chassis, &en_ovs_open_vswitch, NULL);
> + engine_add_input(&en_bfd_chassis, &en_sb_sb_global, NULL);
> + engine_add_input(&en_bfd_chassis, &en_sb_chassis, NULL);
> + engine_add_input(&en_bfd_chassis, &en_sb_ha_chassis_group, NULL);
>
> /* Note: The order of inputs is important, all OVS interface changes must
> * be handled before any ct_zone changes.
> @@ -5221,6 +5276,8 @@ main(int argc, char *argv[])
> controller_output_pflow_output_handler);
> engine_add_input(&en_controller_output, &en_mac_cache,
> controller_output_mac_cache_handler);
> + engine_add_input(&en_controller_output, &en_bfd_chassis,
> + controller_output_bfd_chassis_handler);
>
> struct engine_arg engine_arg = {
> .sb_idl = ovnsb_idl_loop.idl,
> @@ -5263,6 +5320,8 @@ main(int argc, char *argv[])
> engine_get_internal_data(&en_pflow_output);
> struct ed_type_ct_zones *ct_zones_data =
> engine_get_internal_data(&en_ct_zones);
> + struct ed_type_bfd_chassis *bfd_chassis_data =
> + engine_get_internal_data(&en_bfd_chassis);
> struct ed_type_runtime_data *runtime_data =
> engine_get_internal_data(&en_runtime_data);
> struct ed_type_template_vars *template_vars_data =
> @@ -5571,6 +5630,7 @@ main(int argc, char *argv[])
> }
>
> ct_zones_data = engine_get_data(&en_ct_zones);
> + bfd_chassis_data = engine_get_data(&en_bfd_chassis);
> if (ovs_idl_txn) {
> if (ct_zones_data) {
> stopwatch_start(CT_ZONE_COMMIT_STOPWATCH_NAME,
> @@ -5580,13 +5640,18 @@ main(int argc, char *argv[])
> stopwatch_stop(CT_ZONE_COMMIT_STOPWATCH_NAME,
> time_msec());
> }
> - stopwatch_start(BFD_RUN_STOPWATCH_NAME, time_msec());
> - bfd_run(ovsrec_interface_table_get(ovs_idl_loop.idl),
> - br_int, chassis,
> - sbrec_ha_chassis_group_table_get(
> - ovnsb_idl_loop.idl),
> -
> sbrec_sb_global_table_get(ovnsb_idl_loop.idl));
> - stopwatch_stop(BFD_RUN_STOPWATCH_NAME, time_msec());
> + if (bfd_chassis_data) {
> + stopwatch_start(
> + BFD_RUN_STOPWATCH_NAME, time_msec());
> + bfd_run(
> + ovsrec_interface_table_get(ovs_idl_loop.idl),
> + br_int, &bfd_chassis_data->bfd_chassis,
> + chassis, sbrec_sb_global_table_get(
> + ovnsb_idl_loop.idl)
> + );
> + stopwatch_stop(
> + BFD_RUN_STOPWATCH_NAME, time_msec());
> + }
> }
>
> runtime_data = engine_get_data(&en_runtime_data);
> --
> 2.34.1
>
>
>
> Diese E Mail enthält möglicherweise vertrauliche Inhalte und ist nur für die
> Verwertung durch den vorgesehenen Empfänger bestimmt.
> Sollten Sie nicht der vorgesehene Empfänger sein, setzen Sie den Absender
> bitte unverzüglich in Kenntnis und löschen diese E Mail.
>
> Hinweise zum Datenschutz finden Sie
> hier
> .
>
> This e-mail may contain confidential content and is intended only for the
> specified recipient/s.
> If you are not the intended recipient, please inform the sender immediately
> and delete this e-mail.
>
> Information on data protection can be found here.
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev