On Fri, Feb 7, 2025 at 2:18 PM Lucas Vargas Dias via dev <
[email protected]> wrote:

> CMS like neutron uses NB Global nb_cfg to check liveness
> of compute nodes. It will increment nb_cfg of Chassis private
> and with monitor all enabled, all chassis exchange between them
> messages of update Chassis Private. So, CPU load of ovn-controller
> will be high in scenario with many chassis.
> To fix it, each chassis monitor its Chassis Private just only.
>
> Signed-off-by: Lucas Vargas Dias <[email protected]>
> ---
>

Hi Lucas,

thank you for the new version. There is one small thing down below which
doesn't require additonal version and can be adjusted during merge if
maintainers agree.

 controller/ovn-controller.c | 34 ++++++++++++++++++----------
>  tests/ovn-controller.at     | 45 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 67 insertions(+), 12 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 854e54854..cd58f3ffe 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -254,6 +254,16 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>       * avoid the unnecessarily extra wake-ups of ovn-controller. */
>      ovsdb_idl_condition_add_clause_true(&ldpg);
>
> +    if (chassis) {
> +        /* Monitors Chassis_Private record for current chassis only. */
> +        sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ,
> +                                              chassis->name);
> +    } else {
> +        /* During initialization, we monitor all records in
> Chassis_Private so
> +         * that we don't try to recreate existing ones. */
> +        ovsdb_idl_condition_add_clause_true(&chprv);
> +    }
> +
>      if (monitor_all) {
>          ovsdb_idl_condition_add_clause_true(&pb);
>          ovsdb_idl_condition_add_clause_true(&lf);
> @@ -264,7 +274,6 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>          ovsdb_idl_condition_add_clause_true(&ce);
>          ovsdb_idl_condition_add_clause_true(&ip_mcast);
>          ovsdb_idl_condition_add_clause_true(&igmp);
> -        ovsdb_idl_condition_add_clause_true(&chprv);
>          ovsdb_idl_condition_add_clause_true(&tv);
>          goto out;
>      }
> @@ -301,16 +310,9 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>          sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ,
>                                              &chassis->header_.uuid);
>
> -        /* Monitors Chassis_Private record for current chassis only. */
> -        sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ,
> -                                              chassis->name);
> -
>          sbrec_chassis_template_var_add_clause_chassis(&tv, OVSDB_F_EQ,
>                                                        chassis->name);
>      } else {
> -        /* During initialization, we monitor all records in
> Chassis_Private so
> -         * that we don't try to recreate existing ones. */
> -        ovsdb_idl_condition_add_clause_true(&chprv);
>          /* Also, to avoid traffic disruption (e.g., conntrack flushing for
>           * zones that are used by OVN but not yet known due to the SB
> initial
>           * contents not being available), monitor all port bindings
> @@ -603,7 +605,8 @@ static void
>  update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl,
>               bool *monitor_all_p, bool *reset_ovnsb_idl_min_index,
>               struct controller_engine_ctx *ctx,
> -             unsigned int *sb_cond_seqno)
> +             unsigned int *sb_cond_seqno,
> +             struct ovsdb_idl_index *sbrec_chassis_by_name)
>  {
>      const struct ovsrec_open_vswitch *cfg =
> ovsrec_open_vswitch_first(ovs_idl);
>      if (!cfg) {
> @@ -629,12 +632,18 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct
> ovsdb_idl *ovnsb_idl,
>          get_chassis_external_id_value_bool(
>              &cfg->external_ids, chassis_id, "ovn-monitor-all", false);
>      if (monitor_all) {
> +        const struct sbrec_chassis *chassis = NULL;
> +        if (chassis_id && sbrec_chassis_by_name) {
> +            chassis =
> +                chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> +        }
> +
>          /* Always call update_sb_monitors when monitor_all is true.
>           * Otherwise, don't call it here, because there would be
> unnecessary
>           * extra cost. Instead, it is called after the engine execution
> only
>           * when it is necessary. */
>          unsigned int next_cond_seqno =
> -            update_sb_monitors(ovnsb_idl, NULL, NULL, NULL, NULL, true);
> +            update_sb_monitors(ovnsb_idl, chassis, NULL, NULL, NULL,
> true);
>          if (sb_cond_seqno) {
>              *sb_cond_seqno = next_cond_seqno;
>          }
> @@ -5504,7 +5513,8 @@ main(int argc, char *argv[])
>
>          update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl,
> &sb_monitor_all,
>                       &reset_ovnsb_idl_min_index,
> -                     &ctrl_engine_ctx, &ovnsb_expected_cond_seqno);
> +                     &ctrl_engine_ctx, &ovnsb_expected_cond_seqno,
> +                     sbrec_chassis_by_name);
>          update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl));
>
>          struct ovsdb_idl_txn *ovnsb_idl_txn
> @@ -5974,7 +5984,7 @@ loop_done:
>          bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl);
>          while (!done) {
>              update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl,
> -                         NULL, NULL, NULL, NULL);
> +                         NULL, NULL, NULL, NULL, NULL);
>              update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl));
>
>              struct ovsdb_idl_txn *ovs_idl_txn
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index 988de7bb3..9c859374a 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -484,6 +484,51 @@ OVN_CLEANUP([hv])
>  AT_CLEANUP
>  ])
>
> +# Checks that ovn-controller increments the nb_cfg value in the
> Chassis_Private table
> +# and each hv doesn't exchange messages of update of Chassis_Private table
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - Bump Chassis_Private nb_cfg value with Monitor
> All])
> +AT_KEYWORDS([ovn])
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +
> +as hv1 ovs-vsctl set open . external_ids:ovn-monitor-all=true
> +as hv2 ovs-vsctl set open . external_ids:ovn-monitor-all=true
> +
> +# Wait until ovn-monitor-all is processed by ovn-controller.
> +wait_row_count Chassis 1 name=hv1 other_config:ovn-monitor-all=true
> +wait_row_count Chassis 1 name=hv2 other_config:ovn-monitor-all=true
> +
> +# enable debug for check the received messages
> +as hv1 ovn-appctl -t ovn-controller vlog/set dbg
> +as hv2 ovn-appctl -t ovn-controller vlog/set dbg
> +
> +# Bump the NB_Global nb_cfg value
> +nb_global_id=$(ovn-nbctl --columns _uuid --bare find nb_global)
> +check ovn-nbctl set NB_Global ${nb_global_id} nb_cfg=999
> +
> +# ovn-controller should bump the nb_cfg in the chassis_private table
> +OVS_WAIT_UNTIL([test x999 = x`ovn-sbctl --columns nb_cfg --bare find
> chassis_private name=hv1`])
> +OVS_WAIT_UNTIL([test x999 = x`ovn-sbctl --columns nb_cfg --bare find
> chassis_private name=hv2`])
> +
> +AT_CHECK([test 1 = `cat hv1/ovn-controller.log | grep
> 'Chassis_Private.*nb_cfg":999' | wc -l`])
> +AT_CHECK([test 1 = `cat hv2/ovn-controller.log | grep
> 'Chassis_Private.*nb_cfg":999' | wc -l`])
>

nit: Both of those could be shortened by just using "grep -c" instead.


> +
> +OVN_CLEANUP([hv1],[hv2])
> +AT_CLEANUP
> +])
> +
>  # check that nb_cfg overflow cases handled properly
>  AT_SETUP([ovn-controller - overflow the nb_cfg value across the tables])
>  AT_KEYWORDS([ovn])
> --
> 2.34.1
>
>
> --
>
>
>
>
> _'Esta mensagem é direcionada apenas para os endereços constantes no
> cabeçalho inicial. Se você não está listado nos endereços constantes no
> cabeçalho, pedimos-lhe que desconsidere completamente o conteúdo dessa
> mensagem e cuja cópia, encaminhamento e/ou execução das ações citadas
> estão
> imediatamente anuladas e proibidas'._
>
>
> * **'Apesar do Magazine Luiza tomar
> todas as precauções razoáveis para assegurar que nenhum vírus esteja
> presente nesse e-mail, a empresa não poderá aceitar a responsabilidade por
> quaisquer perdas ou danos causados por esse e-mail ou por seus anexos'.*
>
>
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Otherwise it looks good.

Acked-by: Ales Musil <[email protected]>

Thanks,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to