On 12/20/24 7:34 PM, Lorenzo Bianconi wrote:
>> To improve ovn-northd performance when processing changes to IGMP
>> groups this patch implements a handler to process IGMP group changes
>> instead of relying on a full database recompute.
>>
>> I tested using a script that is available here:
>> https://gist.github.com/JacobTanenbaum/da70c75178bec2b8e1cf5487c74466e3
>>
>> when running this script inside the OVN sandbox it creates sw0 and 5000
>> ports on sw0. It then adds one IGMP group with 1 port in it and waits
>> for the databases to sync
>>
>> using upstream main
>>
>> Time spent on processing nb_cfg 2:
>>      ovn-northd delay before processing:     5ms
>>      ovn-northd completion:                  5ms
>>
>> this is a typical output after adding the IGMP group waiting for the Sb
>> to sync
>>
>> while using a branch with this patch:
>>
>> Time spent on processing nb_cfg 2:
>>      ovn-northd delay before processing:     1ms
>>      ovn-northd completion:                  1ms
>>
>> this is the time that it takes to wait for the database to sync.
>>
>> Additionally the time it takes to add an IGMP group increases the more
>> ports are on a switch from the current head of main while on a branch
>> with this patch the time to add an IGMP group remains the same
>> regardless of the size of the database (up to the 100000 ports on the
>> logical switch that I have tested).
> 
> Hi Jacob,

Hi Jacob, Lorenzo, Ales,

> 
> nice patch :) It requires a trivial reabse and just few nit inline.
> Other than that:
> 
> Acked-by: Lorenzo Bianconi <[email protected]>
> 
>>
>> Signed-off-by: Jacob Tanenbaum <[email protected]>
>> Suggested-by: Dumitru Ceara <[email protected]>
>> Reported-at: https://issues.redhat.com/browse/FDP-756
>>
>> ---
>> v4: split into two patches
>>     removed checking if uuid in uuidset
>>     added more specific checks to testing
>>     changed else if to else for updates to igmp_groups
>>     separated the deletion code into it's own function
>> v3: Removed incorrect call to ovn_igmp_group_destroy()
>> v2: Rebase on top of latest main.
>>     Fix memory leaks detected by CI.
>>     Capitalize first letter of the subject line
>>     Fix typos in commit message
>>
>> diff --git a/northd/en-lflow.c b/northd/en-lflow.c
>> index fa1f0236d..6c3e20695 100644
>> --- a/northd/en-lflow.c
>> +++ b/northd/en-lflow.c
>> @@ -233,3 +233,20 @@ void en_lflow_cleanup(void *data_)
>>      struct lflow_data *data = data_;
>>      lflow_table_destroy(data->lflow_table);
>>  }
>> +
> 
> [...]
> 
>>  
>> +/* handle changes to the sbrec_igmp_group_table in the incremental
>> + * processor
>> + */
>> +bool
>> +handle_igmp_change(struct ovsdb_idl_txn *ovnsb_txn,
>> +                   const struct sbrec_igmp_group_table 
>> *sbrec_igmp_group_table,
>> +                   struct lflow_input *input_data,
>> +                   struct lflow_table *lflows)
>> +{
>> +    struct hmap igmp_groups = HMAP_INITIALIZER(&igmp_groups);
>> +    struct hmap mcast_groups = HMAP_INITIALIZER(&mcast_groups);
>> +    struct ds match = DS_EMPTY_INITIALIZER;
>> +    struct ds actions = DS_EMPTY_INITIALIZER;
>> +
>> +    struct uuidset datapaths_to_sync =
>> +        UUIDSET_INITIALIZER(&datapaths_to_sync);
>> +
>> +    const struct sbrec_igmp_group *sb_igmp;
>> +    SBREC_IGMP_GROUP_TABLE_FOR_EACH_TRACKED (sb_igmp, 
>> sbrec_igmp_group_table) {
>> +        struct ovn_datapath *od =
>> +            ovn_datapath_from_sbrec(&input_data->ls_datapaths->datapaths,
>> +                                    &input_data->lr_datapaths->datapaths,
>> +                                    sb_igmp->datapath);
> 
> Theoretically ovn_datapath_from_sbrec() can return a NULL pointer. Is it safe
> here to assume od is always not NULL?
> 

Nice catch!  If the datapath is deleted in the same transaction as the
IGMP group, ovn_datapath_from_sbrec() will return NULL here.

Would this work?

--- a/northd/northd.c
+++ b/northd/northd.c
@@ -19498,6 +19498,13 @@ handle_igmp_change(struct ovsdb_idl_txn *ovnsb_txn,
             ovn_datapath_from_sbrec(&input_data->ls_datapaths->datapaths,
                                     &input_data->lr_datapaths->datapaths,
                                     sb_igmp->datapath);
+        if (!od) {
+            /* Deletion of the datapath can happen at the same time as
+             * the deletion of the IGMP_Group.  Logical switch/datapath
+             * deletion triggers a recompute of the en_lflow node, so
+             * skip any IGMP changes for those datapaths. */
+            continue;
+        }
         /* Processing new IGMP group. */
         if (sbrec_igmp_group_is_new(sb_igmp)) {

>> +        /* Processing new IGMP group. */
>> +        if (sbrec_igmp_group_is_new(sb_igmp)) {
>> +

Nit: no empty line needed IMO.

>> +            build_igmp_group_for_sb(sb_igmp,
>> +                                    od,
>> +                                    
>> input_data->sbrec_mcast_group_by_name_dp,
>> +                                    input_data->ls_ports, &igmp_groups);
>> +
>> +

Nit: no empty lines needed IMO.

>> +            build_igmp_router_entries(od,
>> +                                      
>> input_data->sbrec_mcast_group_by_name_dp,
>> +                                      &igmp_groups);
>> +
>> +        /* Processing a deleted IGMP group. */
>> +        } else if (sbrec_igmp_group_is_deleted(sb_igmp)) {
>> +            delete_igmp_group(sb_igmp,
>> +                              od,
>> +                              input_data->sbrec_mcast_group_by_name_dp,
>> +                              lflows,
>> +                              &datapaths_to_sync,
>> +                              &actions,
>> +                              &match);
>> +        /* Processing an updated IGMP group. */
>> +        } else {
>> +            build_igmp_group_for_sb(sb_igmp,
>> +                                    od,
>> +                                    
>> input_data->sbrec_mcast_group_by_name_dp,
>> +                                    input_data->ls_ports, &igmp_groups);
>> +
>> +            /* Do not need to worry about building IGMP groups for multicast
>> +             * routers with relay enabled. Updating the IGMP group does not
>> +             * make any changes applicable to them.
>> +             */
>> +        }
>> +    }
>> +

Nit: too many empty lines.

>> +
>> +    struct ovn_igmp_group *igmp_group;
>> +    HMAP_FOR_EACH_SAFE (igmp_group, hmap_node, &igmp_groups) {
>> +        if (igmp_group->datapath->nbr) {
>> +            /* For every router datapath each port of the peer switches
>> +             * need to be checked if they flood multicast.
>> +             */
>> +            process_peer_switches_for_igmp(igmp_group,
>> +                                           &datapaths_to_sync,
>> +                                           &mcast_groups,
>> +                                           lflows,
>> +                                           &actions,
>> +                                           input_data->meter_groups);
>> +        }
>> +        build_mcast_group_from_igmp_group(igmp_group,
>> +                                          &mcast_groups,
>> +                                          &igmp_groups);
>> +    }
>> +
>> +    /* After all the routers have been processed the flows for the
>> +     * the switches can be created */
>> +    HMAP_FOR_EACH_SAFE (igmp_group, hmap_node, &igmp_groups) {
>> +        build_lswitch_ip_mcast_igmp_mld(igmp_group, lflows, &actions, 
>> &match);
>> +
>> +        uuidset_insert(&datapaths_to_sync, &igmp_group->datapath->key);
>> +    }
>> +
>> +    struct ovn_multicast *mc;
>> +    HMAP_FOR_EACH_SAFE (mc, hmap_node, &mcast_groups) {
>> +        const struct sbrec_multicast_group *sbmc =
>> +            mcast_group_lookup(input_data->sbrec_mcast_group_by_name_dp,
>> +                                mc->group->name,  mc->datapath->sb);
>> +        if (sbmc) {
>> +            for (size_t i = 0; i < mc->n_ports; i++) {
>> +                sbrec_multicast_group_update_ports_addvalue(sbmc,
>> +                                                            
>> mc->ports[i]->sb);
>> +            }
>> +        } else {
>> +            sbmc = create_sb_multicast_group(ovnsb_txn, mc->datapath->sb,
>> +                                             mc->group->name, 
>> mc->group->key);
>> +            ovn_multicast_update_sbrec(mc, sbmc);
>> +        }
>> +        if (mc->datapath->nbr) {
>> +            build_mcast_lookup_flows_for_lrouter(mc->datapath,
>> +                                                 lflows,
>> +                                                 &match,
>> +                                                 &actions,
>> +                                                 
>> mc->datapath->igmp_lflow_ref);
>> +
>> +            uuidset_insert(&datapaths_to_sync, &mc->datapath->key);
>> +        }
>> +        ovn_multicast_destroy(&mcast_groups, mc);
>> +    }
>> +    struct uuidset_node *uuidnode;
>> +    UUIDSET_FOR_EACH (uuidnode, &datapaths_to_sync) {
>> +        const struct ovn_datapath *dp =
>> +            ovn_datapath_find(&input_data->ls_datapaths->datapaths,
>> +                              &uuidnode->uuid);
>> +        if (!dp) {
>> +            dp = ovn_datapath_find(&input_data->lr_datapaths->datapaths,
>> +                                   &uuidnode->uuid);
>> +        }
> 
> same here, is dp always not NULL?
> 

With the incremental diff above, this can't happen anymore.

>> +
>> +        lflow_ref_sync_lflows(dp->igmp_lflow_ref,
>> +            lflows, ovnsb_txn,
>> +            input_data->ls_datapaths,
>> +            input_data->lr_datapaths,
>> +            input_data->ovn_internal_version_changed,
>> +            input_data->sbrec_logical_flow_table,
>> +            input_data->sbrec_logical_dp_group_table);
>> +    }
>> +
>> +    ds_destroy(&match);
>> +    ds_destroy(&actions);
>> +
>> +    /* It is required to cleanup igmp_groups after multicast_groups */
>> +    HMAP_FOR_EACH_SAFE (igmp_group, hmap_node, &igmp_groups) {
>> +        ovn_igmp_group_destroy(&igmp_groups, igmp_group);
>> +    }
>> +    hmap_destroy(&igmp_groups);
>> +    hmap_destroy(&mcast_groups);
>> +    uuidset_destroy(&datapaths_to_sync);
>> +
>> +    return true;
>> +}
>> +
>> +/* Process the deletion of an IGMP group. We only need to remove the
>> + * multicast_groups associated with the datapath the IGMP group is on and 
>> the
>> + */
>> +static void
>> +delete_igmp_group(const struct sbrec_igmp_group *sb_igmp,
>> +                  struct ovn_datapath *od,
>> +                  struct ovsdb_idl_index *sbrec_mcast_group_by_name_dp,
>> +                  struct lflow_table *lflows,
>> +                  struct uuidset *datapaths_to_sync,
>> +                  struct ds *actions,
>> +                  struct ds *match)
>> +{
>> +    /* The igmp group for multicast routers does not have a
>> +     * multicast group, skip the deletion
>> +     */
>> +     if (!strcmp(sb_igmp->address, OVN_IGMP_GROUP_MROUTERS)) {
>> +        return;
>> +     }
>> +
>> +     for (size_t i = 0; i < od->n_router_ports; i++) {
>> +        struct ovn_port *port = od->router_ports[i];
>> +        lflow_ref_unlink_lflows(port->peer->od->igmp_lflow_ref);
>> +        build_mcast_lookup_flows_for_lrouter(od->router_ports[i]->peer->od,
>> +                                             lflows,
>> +                                             match,
>> +                                             actions,
>> +                                             
>> port->peer->od->igmp_lflow_ref);

This part is not correct in my opinion.

If we happen to be deleting two IGMP records for the same datapath then
we'll call delete_igmp_group() twice (once for each record).  The second
call will unlink any flows the first call might have added.

The same comment applies to process_peer_switches_for_igmp().  I guess
the actual problem is that we can't have only a lflow-ref per datapath.
Instead we should probably also have a lflow_ref per IGMP group.

If an IGMP group is updated what we want is to:
- delete logical flows corresponding to _that IGMP group_ without
  touching others
- add logical flows to implement the functionality for the new IGMP
  group contents

>> +
>> +        uuidset_insert(datapaths_to_sync, 
>> &od->router_ports[i]->peer->od->key);
>> +        const struct sbrec_multicast_group *sbmc =
>> +            mcast_group_lookup(sbrec_mcast_group_by_name_dp,
>> +                               sb_igmp->address,  port->peer->od->sb);
>> +        if (sbmc) {
>> +            sbrec_multicast_group_delete(sbmc);
>> +        }
>> +    }
>> +    const struct sbrec_multicast_group *sbmc =
>> +        mcast_group_lookup(sbrec_mcast_group_by_name_dp,
>> +                           sb_igmp->address,  sb_igmp->datapath);
>> +        int removedPorts = 0;
> 
> indentation has something weird here. Moreover, please avoid CamelCase.
> 

I fixed it up and changed the variable name to 'removed_ports'.

>> +        for (size_t i = 0; i <sb_igmp->n_ports; i++) {
>> +
>> +            if (smap_get_bool(&sb_igmp->ports[i]->options,
>> +                              "mcast_flood",
>> +                              false)) {
>> +                /* If the port has mcast_flood set it is not in
>> +                 * the multicast_group for this igmp_group
>> +                 */
>> +                continue;
>> +            }
>> +            sbrec_multicast_group_update_ports_delvalue(sbmc,
>> +                                                        sb_igmp->ports[i]);
>> +            removedPorts++;
> 
> same here.
> 

Here too.

> Regards,
> Lorenzo
> 
>> +        }
>> +        if (sb_igmp->n_ports >= sbmc->n_ports) {
>> +            lflow_ref_unlink_lflows(od->igmp_lflow_ref);
>> +            uuidset_insert(datapaths_to_sync, &od->key);
>> +        }
>> +        /* if the deletion removed all the ports from the
>> +         * multicast_group delete it from the database
>> +         */
>> +        if (removedPorts == sbmc->n_ports) {
>> +            sbrec_multicast_group_delete(sbmc);
>> +        }
>> +}
>> +
>>  static void
>>  build_igmp_group_for_sb(const struct sbrec_igmp_group *sb_igmp,
>>                          struct ovn_datapath *od,
>> @@ -19452,3 +19682,42 @@ build_mcast_group_from_igmp_group(struct 
>> ovn_igmp_group *igmp_group,
>>       */
>>      ovn_igmp_group_aggregate_ports(igmp_group, mcast_groups);
>>  }
>> +
>> +/* For a given igmp_group check all the peer switches on the igmp_groups
>> + * datapath for ports with mcast_info.flood set, If a port does
>> + * mcast_info.flood set add it to the MC_STATIC group and prepare to update 
>> the
>> + * datapaths logical flows.
>> + */
>> +static void
>> +process_peer_switches_for_igmp(struct ovn_igmp_group *igmp_group,
>> +                               struct uuidset *datapaths_to_sync,
>> +                               struct hmap *mcast_groups,
>> +                               struct lflow_table *lflows,
>> +                               struct ds *actions,
>> +                               const struct shash *meter_groups)
>> +{
>> +
>> +    for (size_t i = 0; i < igmp_group->datapath->n_ls_peers; i++) {
>> +        struct ovn_datapath *ls_peer = igmp_group->datapath->ls_peers[i];
>> +        struct ovn_port *op;
>> +
>> +        HMAP_FOR_EACH_SAFE (op, dp_node, &ls_peer->ports) {
>> +            /* If this port is configured to always flood multicast traffic
>> +             * add it to the MC_STATIC group.
>> +             */
>> +            if (op->mcast_info.flood) {
>> +                ovn_multicast_add(mcast_groups, &mc_static, op);
>> +                op->od->mcast_info.sw.flood_static = true;
>> +
>> +                lflow_ref_unlink_lflows(op->od->igmp_lflow_ref);
>> +                build_lswitch_destination_lookup_bmcast(op->od,
>> +                                                        lflows,
>> +                                                        actions,
>> +                                                        meter_groups,
>> +                                                        NULL);
>> +
>> +                uuidset_insert(datapaths_to_sync, &op->od->key);
>> +            }
>> +        }
>> +    }
>> +}
>> diff --git a/northd/northd.h b/northd/northd.h
>> index d60c944df..7987105ac 100644
>> --- a/northd/northd.h
>> +++ b/northd/northd.h
>> @@ -375,6 +375,7 @@ struct ovn_datapath {
>>      /* The logical router group to which this datapath belongs.
>>       * Valid only if it is logical router datapath. NULL otherwise. */
>>      struct lrouter_group *lr_group;
>> +    struct lflow_ref *igmp_lflow_ref;

As described above, I think we also need one lflow_ref per group.
Otherwise we clear datapath specific IGMP lflows whenever an IGMP
group is updated/deleted.

>>  
>>      /* Map of ovn_port objects belonging to this datapath.
>>       * This map doesn't include derived ports. */
>> @@ -862,4 +863,10 @@ is_vxlan_mode(const struct smap *nb_options,
>>  
>>  uint32_t get_ovn_max_dp_key_local(bool _vxlan_mode);
>>  
>> +bool
>> +handle_igmp_change(struct ovsdb_idl_txn *ovnsb_txn,
>> +                   const struct sbrec_igmp_group_table 
>> *sbrec_igmp_group_table,
>> +                   struct lflow_input *input_data,
>> +                   struct lflow_table *lflows);
>> +
>>  #endif /* NORTHD_H */
>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>> index 4eae1c67c..06385f515 100644
>> --- a/tests/ovn-northd.at
>> +++ b/tests/ovn-northd.at
>> @@ -14,6 +14,7 @@ m4_define([_DUMP_DB_TABLES], [
>>      ovn-sbctl list port_group >> $1
>>      ovn-sbctl list bfd >> $1
>>      ovn-sbctl dump-flows > lflows_$1
>> +    ovn-sbctl list multicast_group > mgroup_$1
>>  ])
>>  
>>  # CHECK_NO_CHANGE_AFTER_RECOMPUTE
>> @@ -10949,6 +10950,92 @@ OVN_CLEANUP([hv1])
>>  AT_CLEANUP
>>  ])
>>  
>> +OVN_FOR_EACH_NORTHD_NO_HV([
>> +AT_SETUP([IGMP incremental processing])
>> +
>> +check_recompute_counter() {
>> +    lflow_recomp=$(as northd ovn-appctl -t ovn-northd inc-engine/show-stats 
>> lflow recompute)
>> +    AT_CHECK([test x$lflow_recomp = x$1])
>> +}
>> +ovn_start
>> +
>> +net_add n1
>> +sim_add hv1
>> +as hv1
>> +
>> +ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.11
>> +
>> +sim_add hv2
>> +as hv2
>> +
>> +check ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.2
>> +
>> +check ovn-nbctl ls-add sw1
>> +check ovn-nbctl ls-add sw2
>> +
>> +check ovn-nbctl lsp-add sw1 sw1-p11
>> +check ovn-nbctl lsp-add sw2 sw2-p21
>> +#check ovn-nbctl set Logical_Switch_Port sw2-p21 options:mcast_flood='true'
>> +
>> +check ovn-nbctl lr-add rtr
>> +check ovn-nbctl lrp-add rtr rtr-sw1 00:00:00:00:01:00 10.0.0.254/24
>> +check ovn-nbctl lrp-add rtr rtr-sw2 00:00:00:00:02:00 10.0.0.254/24
>> +
>> +
>> +check ovn-nbctl lsp-add sw1 sw1-rtr \
>> +    -- lsp-set-type sw1-rtr router  \
>> +    -- lsp-set-addresses sw1-rtr 00:00:00:00:01:00 \
>> +    -- lsp-set-options sw1-rtr router-port=rtr-sw1
>> +
>> +check ovn-nbctl lsp-add sw2 sw2-rtr \
>> +    -- lsp-set-type sw2-rtr router  \
>> +    -- lsp-set-addresses sw1-rtr 00:00:00:00:02:00 \
>> +    -- lsp-set-options sw2-rtr router-port=rtr-sw2
>> +
>> +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>> +#create IGMP_GROUP 239.0.1.68 with port sw1-p11
>> +ovn-sbctl create IGMP_GROUP address=239.0.1.68 \
>> +    datapath=$(fetch_column Datapath_Binding _uuid external_ids:name=sw1) \
>> +    chassis=$(fetch_column Chassis _uuid name=hv1) \
>> +    chassis_name=hv1 \
>> +    ports=$(fetch_column Port_Binding _uuid logical_port=sw1-p11)
>> +
>> +check ovn-nbctl --wait=sb sync
>> +wait_row_count IGMP_GROUP 1 address=239.0.1.68
>> +wait_row_count MULTICAST_GROUP 1 name="239.0.1.68"
>> +wait_row_count MULTICAST_GROUP  1 name="239.0.1.68" 
>> ports='[['$(fetch_column Port_Binding _uuid logical_port=sw1-p11)']]'
>> +check_recompute_counter 0
>> +CHECK_NO_CHANGE_AFTER_RECOMPUTE
>> +
>> +check ovn-nbctl set logical_router rtr \
>> +    options:mcast_relay="true"
>> +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>> +#update IGMP_GROUP 239.0.1.68 to include sw2-p21
>> +ovn-sbctl add igmp_group $(fetch_column IGMP_GROUP _uuid 
>> address=239.0.1.68) ports $(fetch_column Port_Binding _uuid 
>> logical_port=sw2-p21)
>> +
>> +ovn-nbctl --wait=sb sync
>> +wait_row_count IGMP_GROUP 1 address=239.0.1.68
>> +# a new multicast_group is created
>> +wait_row_count MULTICAST_GROUP 2 name=239.0.1.68
>> +check_recompute_counter 0
>> +CHECK_NO_CHANGE_AFTER_RECOMPUTE
>> +
>> +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats
>> +#delete IGMP_GROUP 239.0.1.68
>> +ovn-sbctl destroy igmp_group $(fetch_column IGMP_GROUP _uuid 
>> address=239.0.1.68)
>> +check ovn-nbctl --wait=sb sync
>> +check_recompute_counter 0
>> +CHECK_NO_CHANGE_AFTER_RECOMPUTE
>> +
>> +wait_row_count IGMP_GROUP 0 address=239.0.1.68
>> +wait_row_count MULTICAST_GROUP 0 name=239.0.1.68
>> +
>> +OVN_CLEANUP([hv1], [hv2])
>> +AT_CLEANUP
>> +])
>> +
>>  OVN_FOR_EACH_NORTHD_NO_HV([
>>  AT_SETUP([ACL/Meter incremental processing - no northd recompute])
>>  ovn_start
>> -- 
>> 2.47.1

Regards,
Dumitru

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to