On 8/25/23 18:52, Mark Michelson wrote: > Hi Ilya, > > I merged this change into main and branch-23.06. The conflicts were easy > to resolve in branch-23.06, but when attempting to merge to 23.03, I was > not as comfortable trying to resolve the conflicts. Could you please > post a version of the patch that applies to 23.03 please?
Sure, I'll do that. If not today, then on Monday. Best regards, Ilya Maximets. > > Thanks, > Mark Michelson > > On 8/24/23 15:02, Mark Michelson wrote: >> Thanks Ilya, >> >> Acked-by: Mark Michelson <[email protected]> >> >> The topic of backporting this patch was brought up in the OVN >> development IRC meeting today, and it was universally agreed there that >> this performance issue is so bad that we should backport this fix when >> merging. >> >> On 8/23/23 17:41, Ilya Maximets wrote: >>> Current implementation of 'ref_chassis' recompute is horribly slow. >>> For each port binding it finds LR group, then it adds a chassis of the >>> current port binding to each HA chassis group of this LR group. >>> >>> The number of ports is much greater than number of chassis. And the >>> number of distinct LR groups is even smaller than a number of chassis >>> in a typical setup. Chances are, the setup only has one LR group. >>> So, northd today is trying to add the same chassis to the same >>> HA chassis group over and over again. >>> >>> In an OpenStack setup with ~20 chassis, 40K ports and 5K routers we >>> end up with 33+ million calls to add_to_ha_ref_chassis_info() that >>> also performs a linear search for duplicates leading to about 270 >>> million operations, while we only have 5K Sb HA chassis groups with >>> 15 ref_chassis in each. I.e. only 5000 * 15 = 75.000 operations out >>> of 270 million are actually useful. >>> >>> In practice that leads to 80 second recompute: >>> >>> inc_proc_eng|INFO|node: sync_from_sb, recompute (forced) took 80074ms >>> >>> Fix that by accumulating unique chassis per unique LR groups first >>> and then updating ref_chassis with these unique values avoiding any >>> unnecessary calls. Result is ~1000x performance improvement: >>> >>> inc_proc_eng|DBG|node: sync_from_sb, recompute (forced) took 75ms >>> >>> Considering this as a performance bug, because it makes OVN practically >>> unusable in certain (default?) OpenStack configurations at scale. >>> >>> Fixes: b82d351976a1 ("ovn: Add generic HA chassis group") >>> Reported-at: >>> https://mail.openvswitch.org/pipermail/ovs-discuss/2023-August/052614.html >>> Reported-by: Roberto Bartzen Acosta <[email protected]> >>> Signed-off-by: Ilya Maximets <[email protected]> >>> --- >>> northd/northd.c | 118 ++++++++++++++++++++++++++++++------------------ >>> 1 file changed, 75 insertions(+), 43 deletions(-) >>> >>> diff --git a/northd/northd.c b/northd/northd.c >>> index 0a749931e..f5fb5e83a 100644 >>> --- a/northd/northd.c >>> +++ b/northd/northd.c >>> @@ -799,6 +799,8 @@ struct lrouter_group { >>> int n_router_dps; >>> /* Set of ha_chassis_groups which are associated with the router >>> dps. */ >>> struct sset ha_chassis_groups; >>> + /* Temporary storage for chassis references while computing HA >>> groups. */ >>> + struct hmapx tmp_ha_chassis; >>> }; >>> static struct ovn_datapath * >>> @@ -8708,6 +8710,7 @@ build_lrouter_groups(struct hmap *lr_ports, >>> struct ovs_list *lr_list) >>> od->lr_group->router_dps[0] = od; >>> od->lr_group->n_router_dps = 1; >>> sset_init(&od->lr_group->ha_chassis_groups); >>> + hmapx_init(&od->lr_group->tmp_ha_chassis); >>> build_lrouter_groups__(lr_ports, od); >>> } >>> } >>> @@ -17399,6 +17402,7 @@ destroy_datapaths_and_ports(struct >>> ovn_datapaths *ls_datapaths, >>> free(lr_group->router_dps); >>> sset_destroy(&lr_group->ha_chassis_groups); >>> + hmapx_destroy(&lr_group->tmp_ha_chassis); >>> free(lr_group); >>> } >>> } >>> @@ -17671,38 +17675,27 @@ ovnnb_db_run(struct northd_input *input_data, >>> smap_destroy(&options); >>> } >>> -/* Stores the list of chassis which references an ha_chassis_group. >>> +/* Stores the set of chassis which references an ha_chassis_group. >>> */ >>> struct ha_ref_chassis_info { >>> const struct sbrec_ha_chassis_group *ha_chassis_group; >>> - struct sbrec_chassis **ref_chassis; >>> - size_t n_ref_chassis; >>> - size_t free_slots; >>> + struct hmapx ref_chassis; >>> }; >>> static void >>> add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info, >>> - const struct sbrec_chassis *chassis) >>> + const struct hmapx *chassis) >>> { >>> - for (size_t j = 0; j < ref_ch_info->n_ref_chassis; j++) { >>> - if (ref_ch_info->ref_chassis[j] == chassis) { >>> - return; >>> - } >>> - } >>> + if (!hmapx_count(&ref_ch_info->ref_chassis)) { >>> + hmapx_destroy(&ref_ch_info->ref_chassis); >>> + hmapx_clone(&ref_ch_info->ref_chassis, chassis); >>> + } else { >>> + struct hmapx_node *node; >>> - /* Allocate space for 3 chassis at a time. */ >>> - if (!ref_ch_info->free_slots) { >>> - ref_ch_info->ref_chassis = >>> - xrealloc(ref_ch_info->ref_chassis, >>> - sizeof *ref_ch_info->ref_chassis * >>> - (ref_ch_info->n_ref_chassis + 3)); >>> - ref_ch_info->free_slots = 3; >>> + HMAPX_FOR_EACH (node, chassis) { >>> + hmapx_add(&ref_ch_info->ref_chassis, node->data); >>> + } >>> } >>> - >>> - ref_ch_info->ref_chassis[ref_ch_info->n_ref_chassis] = >>> - CONST_CAST(struct sbrec_chassis *, chassis); >>> - ref_ch_info->n_ref_chassis++; >>> - ref_ch_info->free_slots--; >>> } >>> struct ha_chassis_group_node { >>> @@ -17731,9 +17724,20 @@ update_sb_ha_group_ref_chassis( >>> struct shash_node *node; >>> SHASH_FOR_EACH_SAFE (node, ha_ref_chassis_map) { >>> struct ha_ref_chassis_info *ha_ref_info = node->data; >>> + size_t n = hmapx_count(&ha_ref_info->ref_chassis); >>> + struct sbrec_chassis **ref_chassis; >>> + struct hmapx_node *chassis_node; >>> + >>> + ref_chassis = xmalloc(n * sizeof *ref_chassis); >>> + >>> + n = 0; >>> + HMAPX_FOR_EACH (chassis_node, &ha_ref_info->ref_chassis) { >>> + ref_chassis[n++] = chassis_node->data; >>> + } >>> + >>> >>> sbrec_ha_chassis_group_set_ref_chassis(ha_ref_info->ha_chassis_group, >>> - ha_ref_info->ref_chassis, >>> - >>> ha_ref_info->n_ref_chassis); >>> + ref_chassis, n); >>> + free(ref_chassis); >>> /* Remove the updated group from the set. */ >>> HMAP_FOR_EACH_WITH_HASH (ha_ch_grp_node, hmap_node, >>> @@ -17745,7 +17749,7 @@ update_sb_ha_group_ref_chassis( >>> break; >>> } >>> } >>> - free(ha_ref_info->ref_chassis); >>> + hmapx_destroy(&ha_ref_info->ref_chassis); >>> free(ha_ref_info); >>> shash_delete(ha_ref_chassis_map, node); >>> } >>> @@ -17782,10 +17786,9 @@ update_sb_ha_group_ref_chassis( >>> * - 'ref_chassis' of hagrp1. >>> */ >>> static void >>> -build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index >>> *ha_ch_grp_by_name, >>> - const struct sbrec_port_binding *sb, >>> - struct ovn_port *op, >>> - struct shash *ha_ref_chassis_map) >>> +collect_lb_groups_for_ha_chassis_groups(const struct >>> sbrec_port_binding *sb, >>> + struct ovn_port *op, >>> + struct hmapx *lr_groups) >>> { >>> struct lrouter_group *lr_group = NULL; >>> for (size_t i = 0; i < op->od->n_router_ports; i++) { >>> @@ -17804,18 +17807,39 @@ build_ha_chassis_group_ref_chassis(struct >>> ovsdb_idl_index *ha_ch_grp_by_name, >>> return; >>> } >>> - const char *ha_group_name; >>> - SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) { >>> - const struct sbrec_ha_chassis_group *sb_ha_chassis_grp; >>> - sb_ha_chassis_grp = ha_chassis_group_lookup_by_name( >>> - ha_ch_grp_by_name, ha_group_name); >>> + hmapx_add(lr_groups, lr_group); >>> + hmapx_add(&lr_group->tmp_ha_chassis, sb->chassis); >>> +} >>> + >>> +static void >>> +build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index >>> *ha_ch_grp_by_name, >>> + struct hmapx *lr_groups, >>> + struct shash *ha_ref_chassis_map) >>> +{ >>> + struct hmapx_node *node; >>> + >>> + HMAPX_FOR_EACH (node, lr_groups) { >>> + struct lrouter_group *lr_group = node->data; >>> + const char *ha_group_name; >>> + >>> + SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) { >>> + const struct sbrec_ha_chassis_group *sb_ha_chassis_grp; >>> + >>> + sb_ha_chassis_grp = ha_chassis_group_lookup_by_name( >>> + ha_ch_grp_by_name, ha_group_name); >>> + if (!sb_ha_chassis_grp) { >>> + continue; >>> + } >>> - if (sb_ha_chassis_grp) { >>> struct ha_ref_chassis_info *ref_ch_info = >>> - shash_find_data(ha_ref_chassis_map, >>> sb_ha_chassis_grp->name); >>> + shash_find_data(ha_ref_chassis_map, >>> sb_ha_chassis_grp->name); >>> ovs_assert(ref_ch_info); >>> - add_to_ha_ref_chassis_info(ref_ch_info, sb->chassis); >>> + >>> + add_to_ha_ref_chassis_info(ref_ch_info, >>> &lr_group->tmp_ha_chassis); >>> } >>> + >>> + hmapx_destroy(&lr_group->tmp_ha_chassis); >>> + hmapx_init(&lr_group->tmp_ha_chassis); >>> } >>> } >>> @@ -17830,15 +17854,19 @@ handle_port_binding_changes(struct >>> ovsdb_idl_txn *ovnsb_txn, >>> struct hmap *ls_ports, >>> struct shash *ha_ref_chassis_map) >>> { >>> + struct hmapx lr_groups = HMAPX_INITIALIZER(&lr_groups); >>> const struct sbrec_port_binding *sb; >>> bool build_ha_chassis_ref = false; >>> + >>> if (ovnsb_txn) { >>> const struct sbrec_ha_chassis_group *ha_ch_grp; >>> SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_ch_grp, >>> sb_ha_ch_grp_table) { >>> if (ha_ch_grp->n_ha_chassis > 1) { >>> - struct ha_ref_chassis_info *ref_ch_info = >>> - xzalloc(sizeof *ref_ch_info); >>> + struct ha_ref_chassis_info *ref_ch_info; >>> + >>> + ref_ch_info = xzalloc(sizeof *ref_ch_info); >>> ref_ch_info->ha_chassis_group = ha_ch_grp; >>> + hmapx_init(&ref_ch_info->ref_chassis); >>> build_ha_chassis_ref = true; >>> shash_add(ha_ref_chassis_map, ha_ch_grp->name, >>> ref_ch_info); >>> } >>> @@ -17879,12 +17907,16 @@ handle_port_binding_changes(struct >>> ovsdb_idl_txn *ovnsb_txn, >>> } >>> if (build_ha_chassis_ref && ovnsb_txn && sb->chassis) { >>> - /* Check and add the chassis which has claimed this 'sb' >>> - * to the ha chassis group's ref_chassis if required. */ >>> - build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, >>> sb, op, >>> - ha_ref_chassis_map); >>> + /* Check and collect the chassis which has claimed this 'sb' >>> + * in relation to LR groups. */ >>> + collect_lb_groups_for_ha_chassis_groups(sb, op, &lr_groups); >>> } >>> } >>> + >>> + /* Update ha chassis group's ref_chassis if required. */ >>> + build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, &lr_groups, >>> + ha_ref_chassis_map); >>> + hmapx_destroy(&lr_groups); >>> } >>> /* Handle a fairly small set of changes in the southbound database. */ >> > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
