On 12/20/24 7:34 PM, Lorenzo Bianconi wrote: >> To improve ovn-northd performance when processing changes to IGMP >> groups this patch implements a handler to process IGMP group changes >> instead of relying on a full database recompute. >> >> I tested using a script that is available here: >> https://gist.github.com/JacobTanenbaum/da70c75178bec2b8e1cf5487c74466e3 >> >> when running this script inside the OVN sandbox it creates sw0 and 5000 >> ports on sw0. It then adds one IGMP group with 1 port in it and waits >> for the databases to sync >> >> using upstream main >> >> Time spent on processing nb_cfg 2: >> ovn-northd delay before processing: 5ms >> ovn-northd completion: 5ms >> >> this is a typical output after adding the IGMP group waiting for the Sb >> to sync >> >> while using a branch with this patch: >> >> Time spent on processing nb_cfg 2: >> ovn-northd delay before processing: 1ms >> ovn-northd completion: 1ms >> >> this is the time that it takes to wait for the database to sync. >> >> Additionally the time it takes to add an IGMP group increases the more >> ports are on a switch from the current head of main while on a branch >> with this patch the time to add an IGMP group remains the same >> regardless of the size of the database (up to the 100000 ports on the >> logical switch that I have tested). > > Hi Jacob,
Hi Jacob, Lorenzo, Ales, > > nice patch :) It requires a trivial reabse and just few nit inline. > Other than that: > > Acked-by: Lorenzo Bianconi <[email protected]> > >> >> Signed-off-by: Jacob Tanenbaum <[email protected]> >> Suggested-by: Dumitru Ceara <[email protected]> >> Reported-at: https://issues.redhat.com/browse/FDP-756 >> >> --- >> v4: split into two patches >> removed checking if uuid in uuidset >> added more specific checks to testing >> changed else if to else for updates to igmp_groups >> separated the deletion code into it's own function >> v3: Removed incorrect call to ovn_igmp_group_destroy() >> v2: Rebase on top of latest main. >> Fix memory leaks detected by CI. >> Capitalize first letter of the subject line >> Fix typos in commit message >> >> diff --git a/northd/en-lflow.c b/northd/en-lflow.c >> index fa1f0236d..6c3e20695 100644 >> --- a/northd/en-lflow.c >> +++ b/northd/en-lflow.c >> @@ -233,3 +233,20 @@ void en_lflow_cleanup(void *data_) >> struct lflow_data *data = data_; >> lflow_table_destroy(data->lflow_table); >> } >> + > > [...] > >> >> +/* handle changes to the sbrec_igmp_group_table in the incremental >> + * processor >> + */ >> +bool >> +handle_igmp_change(struct ovsdb_idl_txn *ovnsb_txn, >> + const struct sbrec_igmp_group_table >> *sbrec_igmp_group_table, >> + struct lflow_input *input_data, >> + struct lflow_table *lflows) >> +{ >> + struct hmap igmp_groups = HMAP_INITIALIZER(&igmp_groups); >> + struct hmap mcast_groups = HMAP_INITIALIZER(&mcast_groups); >> + struct ds match = DS_EMPTY_INITIALIZER; >> + struct ds actions = DS_EMPTY_INITIALIZER; >> + >> + struct uuidset datapaths_to_sync = >> + UUIDSET_INITIALIZER(&datapaths_to_sync); >> + >> + const struct sbrec_igmp_group *sb_igmp; >> + SBREC_IGMP_GROUP_TABLE_FOR_EACH_TRACKED (sb_igmp, >> sbrec_igmp_group_table) { >> + struct ovn_datapath *od = >> + ovn_datapath_from_sbrec(&input_data->ls_datapaths->datapaths, >> + &input_data->lr_datapaths->datapaths, >> + sb_igmp->datapath); > > Theoretically ovn_datapath_from_sbrec() can return a NULL pointer. Is it safe > here to assume od is always not NULL? > Nice catch! If the datapath is deleted in the same transaction as the IGMP group, ovn_datapath_from_sbrec() will return NULL here. Would this work? --- a/northd/northd.c +++ b/northd/northd.c @@ -19498,6 +19498,13 @@ handle_igmp_change(struct ovsdb_idl_txn *ovnsb_txn, ovn_datapath_from_sbrec(&input_data->ls_datapaths->datapaths, &input_data->lr_datapaths->datapaths, sb_igmp->datapath); + if (!od) { + /* Deletion of the datapath can happen at the same time as + * the deletion of the IGMP_Group. Logical switch/datapath + * deletion triggers a recompute of the en_lflow node, so + * skip any IGMP changes for those datapaths. */ + continue; + } /* Processing new IGMP group. */ if (sbrec_igmp_group_is_new(sb_igmp)) { >> + /* Processing new IGMP group. */ >> + if (sbrec_igmp_group_is_new(sb_igmp)) { >> + Nit: no empty line needed IMO. >> + build_igmp_group_for_sb(sb_igmp, >> + od, >> + >> input_data->sbrec_mcast_group_by_name_dp, >> + input_data->ls_ports, &igmp_groups); >> + >> + Nit: no empty lines needed IMO. >> + build_igmp_router_entries(od, >> + >> input_data->sbrec_mcast_group_by_name_dp, >> + &igmp_groups); >> + >> + /* Processing a deleted IGMP group. */ >> + } else if (sbrec_igmp_group_is_deleted(sb_igmp)) { >> + delete_igmp_group(sb_igmp, >> + od, >> + input_data->sbrec_mcast_group_by_name_dp, >> + lflows, >> + &datapaths_to_sync, >> + &actions, >> + &match); >> + /* Processing an updated IGMP group. */ >> + } else { >> + build_igmp_group_for_sb(sb_igmp, >> + od, >> + >> input_data->sbrec_mcast_group_by_name_dp, >> + input_data->ls_ports, &igmp_groups); >> + >> + /* Do not need to worry about building IGMP groups for multicast >> + * routers with relay enabled. Updating the IGMP group does not >> + * make any changes applicable to them. >> + */ >> + } >> + } >> + Nit: too many empty lines. >> + >> + struct ovn_igmp_group *igmp_group; >> + HMAP_FOR_EACH_SAFE (igmp_group, hmap_node, &igmp_groups) { >> + if (igmp_group->datapath->nbr) { >> + /* For every router datapath each port of the peer switches >> + * need to be checked if they flood multicast. >> + */ >> + process_peer_switches_for_igmp(igmp_group, >> + &datapaths_to_sync, >> + &mcast_groups, >> + lflows, >> + &actions, >> + input_data->meter_groups); >> + } >> + build_mcast_group_from_igmp_group(igmp_group, >> + &mcast_groups, >> + &igmp_groups); >> + } >> + >> + /* After all the routers have been processed the flows for the >> + * the switches can be created */ >> + HMAP_FOR_EACH_SAFE (igmp_group, hmap_node, &igmp_groups) { >> + build_lswitch_ip_mcast_igmp_mld(igmp_group, lflows, &actions, >> &match); >> + >> + uuidset_insert(&datapaths_to_sync, &igmp_group->datapath->key); >> + } >> + >> + struct ovn_multicast *mc; >> + HMAP_FOR_EACH_SAFE (mc, hmap_node, &mcast_groups) { >> + const struct sbrec_multicast_group *sbmc = >> + mcast_group_lookup(input_data->sbrec_mcast_group_by_name_dp, >> + mc->group->name, mc->datapath->sb); >> + if (sbmc) { >> + for (size_t i = 0; i < mc->n_ports; i++) { >> + sbrec_multicast_group_update_ports_addvalue(sbmc, >> + >> mc->ports[i]->sb); >> + } >> + } else { >> + sbmc = create_sb_multicast_group(ovnsb_txn, mc->datapath->sb, >> + mc->group->name, >> mc->group->key); >> + ovn_multicast_update_sbrec(mc, sbmc); >> + } >> + if (mc->datapath->nbr) { >> + build_mcast_lookup_flows_for_lrouter(mc->datapath, >> + lflows, >> + &match, >> + &actions, >> + >> mc->datapath->igmp_lflow_ref); >> + >> + uuidset_insert(&datapaths_to_sync, &mc->datapath->key); >> + } >> + ovn_multicast_destroy(&mcast_groups, mc); >> + } >> + struct uuidset_node *uuidnode; >> + UUIDSET_FOR_EACH (uuidnode, &datapaths_to_sync) { >> + const struct ovn_datapath *dp = >> + ovn_datapath_find(&input_data->ls_datapaths->datapaths, >> + &uuidnode->uuid); >> + if (!dp) { >> + dp = ovn_datapath_find(&input_data->lr_datapaths->datapaths, >> + &uuidnode->uuid); >> + } > > same here, is dp always not NULL? > With the incremental diff above, this can't happen anymore. >> + >> + lflow_ref_sync_lflows(dp->igmp_lflow_ref, >> + lflows, ovnsb_txn, >> + input_data->ls_datapaths, >> + input_data->lr_datapaths, >> + input_data->ovn_internal_version_changed, >> + input_data->sbrec_logical_flow_table, >> + input_data->sbrec_logical_dp_group_table); >> + } >> + >> + ds_destroy(&match); >> + ds_destroy(&actions); >> + >> + /* It is required to cleanup igmp_groups after multicast_groups */ >> + HMAP_FOR_EACH_SAFE (igmp_group, hmap_node, &igmp_groups) { >> + ovn_igmp_group_destroy(&igmp_groups, igmp_group); >> + } >> + hmap_destroy(&igmp_groups); >> + hmap_destroy(&mcast_groups); >> + uuidset_destroy(&datapaths_to_sync); >> + >> + return true; >> +} >> + >> +/* Process the deletion of an IGMP group. We only need to remove the >> + * multicast_groups associated with the datapath the IGMP group is on and >> the >> + */ >> +static void >> +delete_igmp_group(const struct sbrec_igmp_group *sb_igmp, >> + struct ovn_datapath *od, >> + struct ovsdb_idl_index *sbrec_mcast_group_by_name_dp, >> + struct lflow_table *lflows, >> + struct uuidset *datapaths_to_sync, >> + struct ds *actions, >> + struct ds *match) >> +{ >> + /* The igmp group for multicast routers does not have a >> + * multicast group, skip the deletion >> + */ >> + if (!strcmp(sb_igmp->address, OVN_IGMP_GROUP_MROUTERS)) { >> + return; >> + } >> + >> + for (size_t i = 0; i < od->n_router_ports; i++) { >> + struct ovn_port *port = od->router_ports[i]; >> + lflow_ref_unlink_lflows(port->peer->od->igmp_lflow_ref); >> + build_mcast_lookup_flows_for_lrouter(od->router_ports[i]->peer->od, >> + lflows, >> + match, >> + actions, >> + >> port->peer->od->igmp_lflow_ref); This part is not correct in my opinion. If we happen to be deleting two IGMP records for the same datapath then we'll call delete_igmp_group() twice (once for each record). The second call will unlink any flows the first call might have added. The same comment applies to process_peer_switches_for_igmp(). I guess the actual problem is that we can't have only a lflow-ref per datapath. Instead we should probably also have a lflow_ref per IGMP group. If an IGMP group is updated what we want is to: - delete logical flows corresponding to _that IGMP group_ without touching others - add logical flows to implement the functionality for the new IGMP group contents >> + >> + uuidset_insert(datapaths_to_sync, >> &od->router_ports[i]->peer->od->key); >> + const struct sbrec_multicast_group *sbmc = >> + mcast_group_lookup(sbrec_mcast_group_by_name_dp, >> + sb_igmp->address, port->peer->od->sb); >> + if (sbmc) { >> + sbrec_multicast_group_delete(sbmc); >> + } >> + } >> + const struct sbrec_multicast_group *sbmc = >> + mcast_group_lookup(sbrec_mcast_group_by_name_dp, >> + sb_igmp->address, sb_igmp->datapath); >> + int removedPorts = 0; > > indentation has something weird here. Moreover, please avoid CamelCase. > I fixed it up and changed the variable name to 'removed_ports'. >> + for (size_t i = 0; i <sb_igmp->n_ports; i++) { >> + >> + if (smap_get_bool(&sb_igmp->ports[i]->options, >> + "mcast_flood", >> + false)) { >> + /* If the port has mcast_flood set it is not in >> + * the multicast_group for this igmp_group >> + */ >> + continue; >> + } >> + sbrec_multicast_group_update_ports_delvalue(sbmc, >> + sb_igmp->ports[i]); >> + removedPorts++; > > same here. > Here too. > Regards, > Lorenzo > >> + } >> + if (sb_igmp->n_ports >= sbmc->n_ports) { >> + lflow_ref_unlink_lflows(od->igmp_lflow_ref); >> + uuidset_insert(datapaths_to_sync, &od->key); >> + } >> + /* if the deletion removed all the ports from the >> + * multicast_group delete it from the database >> + */ >> + if (removedPorts == sbmc->n_ports) { >> + sbrec_multicast_group_delete(sbmc); >> + } >> +} >> + >> static void >> build_igmp_group_for_sb(const struct sbrec_igmp_group *sb_igmp, >> struct ovn_datapath *od, >> @@ -19452,3 +19682,42 @@ build_mcast_group_from_igmp_group(struct >> ovn_igmp_group *igmp_group, >> */ >> ovn_igmp_group_aggregate_ports(igmp_group, mcast_groups); >> } >> + >> +/* For a given igmp_group check all the peer switches on the igmp_groups >> + * datapath for ports with mcast_info.flood set, If a port does >> + * mcast_info.flood set add it to the MC_STATIC group and prepare to update >> the >> + * datapaths logical flows. >> + */ >> +static void >> +process_peer_switches_for_igmp(struct ovn_igmp_group *igmp_group, >> + struct uuidset *datapaths_to_sync, >> + struct hmap *mcast_groups, >> + struct lflow_table *lflows, >> + struct ds *actions, >> + const struct shash *meter_groups) >> +{ >> + >> + for (size_t i = 0; i < igmp_group->datapath->n_ls_peers; i++) { >> + struct ovn_datapath *ls_peer = igmp_group->datapath->ls_peers[i]; >> + struct ovn_port *op; >> + >> + HMAP_FOR_EACH_SAFE (op, dp_node, &ls_peer->ports) { >> + /* If this port is configured to always flood multicast traffic >> + * add it to the MC_STATIC group. >> + */ >> + if (op->mcast_info.flood) { >> + ovn_multicast_add(mcast_groups, &mc_static, op); >> + op->od->mcast_info.sw.flood_static = true; >> + >> + lflow_ref_unlink_lflows(op->od->igmp_lflow_ref); >> + build_lswitch_destination_lookup_bmcast(op->od, >> + lflows, >> + actions, >> + meter_groups, >> + NULL); >> + >> + uuidset_insert(datapaths_to_sync, &op->od->key); >> + } >> + } >> + } >> +} >> diff --git a/northd/northd.h b/northd/northd.h >> index d60c944df..7987105ac 100644 >> --- a/northd/northd.h >> +++ b/northd/northd.h >> @@ -375,6 +375,7 @@ struct ovn_datapath { >> /* The logical router group to which this datapath belongs. >> * Valid only if it is logical router datapath. NULL otherwise. */ >> struct lrouter_group *lr_group; >> + struct lflow_ref *igmp_lflow_ref; As described above, I think we also need one lflow_ref per group. Otherwise we clear datapath specific IGMP lflows whenever an IGMP group is updated/deleted. >> >> /* Map of ovn_port objects belonging to this datapath. >> * This map doesn't include derived ports. */ >> @@ -862,4 +863,10 @@ is_vxlan_mode(const struct smap *nb_options, >> >> uint32_t get_ovn_max_dp_key_local(bool _vxlan_mode); >> >> +bool >> +handle_igmp_change(struct ovsdb_idl_txn *ovnsb_txn, >> + const struct sbrec_igmp_group_table >> *sbrec_igmp_group_table, >> + struct lflow_input *input_data, >> + struct lflow_table *lflows); >> + >> #endif /* NORTHD_H */ >> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at >> index 4eae1c67c..06385f515 100644 >> --- a/tests/ovn-northd.at >> +++ b/tests/ovn-northd.at >> @@ -14,6 +14,7 @@ m4_define([_DUMP_DB_TABLES], [ >> ovn-sbctl list port_group >> $1 >> ovn-sbctl list bfd >> $1 >> ovn-sbctl dump-flows > lflows_$1 >> + ovn-sbctl list multicast_group > mgroup_$1 >> ]) >> >> # CHECK_NO_CHANGE_AFTER_RECOMPUTE >> @@ -10949,6 +10950,92 @@ OVN_CLEANUP([hv1]) >> AT_CLEANUP >> ]) >> >> +OVN_FOR_EACH_NORTHD_NO_HV([ >> +AT_SETUP([IGMP incremental processing]) >> + >> +check_recompute_counter() { >> + lflow_recomp=$(as northd ovn-appctl -t ovn-northd inc-engine/show-stats >> lflow recompute) >> + AT_CHECK([test x$lflow_recomp = x$1]) >> +} >> +ovn_start >> + >> +net_add n1 >> +sim_add hv1 >> +as hv1 >> + >> +ovs-vsctl add-br br-phys >> +ovn_attach n1 br-phys 192.168.0.11 >> + >> +sim_add hv2 >> +as hv2 >> + >> +check ovs-vsctl add-br br-phys >> +ovn_attach n1 br-phys 192.168.0.2 >> + >> +check ovn-nbctl ls-add sw1 >> +check ovn-nbctl ls-add sw2 >> + >> +check ovn-nbctl lsp-add sw1 sw1-p11 >> +check ovn-nbctl lsp-add sw2 sw2-p21 >> +#check ovn-nbctl set Logical_Switch_Port sw2-p21 options:mcast_flood='true' >> + >> +check ovn-nbctl lr-add rtr >> +check ovn-nbctl lrp-add rtr rtr-sw1 00:00:00:00:01:00 10.0.0.254/24 >> +check ovn-nbctl lrp-add rtr rtr-sw2 00:00:00:00:02:00 10.0.0.254/24 >> + >> + >> +check ovn-nbctl lsp-add sw1 sw1-rtr \ >> + -- lsp-set-type sw1-rtr router \ >> + -- lsp-set-addresses sw1-rtr 00:00:00:00:01:00 \ >> + -- lsp-set-options sw1-rtr router-port=rtr-sw1 >> + >> +check ovn-nbctl lsp-add sw2 sw2-rtr \ >> + -- lsp-set-type sw2-rtr router \ >> + -- lsp-set-addresses sw1-rtr 00:00:00:00:02:00 \ >> + -- lsp-set-options sw2-rtr router-port=rtr-sw2 >> + >> +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats >> +#create IGMP_GROUP 239.0.1.68 with port sw1-p11 >> +ovn-sbctl create IGMP_GROUP address=239.0.1.68 \ >> + datapath=$(fetch_column Datapath_Binding _uuid external_ids:name=sw1) \ >> + chassis=$(fetch_column Chassis _uuid name=hv1) \ >> + chassis_name=hv1 \ >> + ports=$(fetch_column Port_Binding _uuid logical_port=sw1-p11) >> + >> +check ovn-nbctl --wait=sb sync >> +wait_row_count IGMP_GROUP 1 address=239.0.1.68 >> +wait_row_count MULTICAST_GROUP 1 name="239.0.1.68" >> +wait_row_count MULTICAST_GROUP 1 name="239.0.1.68" >> ports='[['$(fetch_column Port_Binding _uuid logical_port=sw1-p11)']]' >> +check_recompute_counter 0 >> +CHECK_NO_CHANGE_AFTER_RECOMPUTE >> + >> +check ovn-nbctl set logical_router rtr \ >> + options:mcast_relay="true" >> +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats >> +#update IGMP_GROUP 239.0.1.68 to include sw2-p21 >> +ovn-sbctl add igmp_group $(fetch_column IGMP_GROUP _uuid >> address=239.0.1.68) ports $(fetch_column Port_Binding _uuid >> logical_port=sw2-p21) >> + >> +ovn-nbctl --wait=sb sync >> +wait_row_count IGMP_GROUP 1 address=239.0.1.68 >> +# a new multicast_group is created >> +wait_row_count MULTICAST_GROUP 2 name=239.0.1.68 >> +check_recompute_counter 0 >> +CHECK_NO_CHANGE_AFTER_RECOMPUTE >> + >> +check as northd ovn-appctl -t ovn-northd inc-engine/clear-stats >> +#delete IGMP_GROUP 239.0.1.68 >> +ovn-sbctl destroy igmp_group $(fetch_column IGMP_GROUP _uuid >> address=239.0.1.68) >> +check ovn-nbctl --wait=sb sync >> +check_recompute_counter 0 >> +CHECK_NO_CHANGE_AFTER_RECOMPUTE >> + >> +wait_row_count IGMP_GROUP 0 address=239.0.1.68 >> +wait_row_count MULTICAST_GROUP 0 name=239.0.1.68 >> + >> +OVN_CLEANUP([hv1], [hv2]) >> +AT_CLEANUP >> +]) >> + >> OVN_FOR_EACH_NORTHD_NO_HV([ >> AT_SETUP([ACL/Meter incremental processing - no northd recompute]) >> ovn_start >> -- >> 2.47.1 Regards, Dumitru _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
