On 8/25/23 18:55, Ilya Maximets wrote: > On 8/25/23 18:52, Mark Michelson wrote: >> Hi Ilya, >> >> I merged this change into main and branch-23.06. The conflicts were easy >> to resolve in branch-23.06, but when attempting to merge to 23.03, I was >> not as comfortable trying to resolve the conflicts. Could you please >> post a version of the patch that applies to 23.03 please? > > Sure, I'll do that. If not today, then on Monday.
Posted a backport here: https://patchwork.ozlabs.org/project/ovn/patch/[email protected]/ It should be applicable to all the branches down to 22.03. There were no significant changes during backport, mostly a struct northd_input change that caused conflicts. > > Best regards, Ilya Maximets. > >> >> Thanks, >> Mark Michelson >> >> On 8/24/23 15:02, Mark Michelson wrote: >>> Thanks Ilya, >>> >>> Acked-by: Mark Michelson <[email protected]> >>> >>> The topic of backporting this patch was brought up in the OVN >>> development IRC meeting today, and it was universally agreed there that >>> this performance issue is so bad that we should backport this fix when >>> merging. >>> >>> On 8/23/23 17:41, Ilya Maximets wrote: >>>> Current implementation of 'ref_chassis' recompute is horribly slow. >>>> For each port binding it finds LR group, then it adds a chassis of the >>>> current port binding to each HA chassis group of this LR group. >>>> >>>> The number of ports is much greater than number of chassis. And the >>>> number of distinct LR groups is even smaller than a number of chassis >>>> in a typical setup. Chances are, the setup only has one LR group. >>>> So, northd today is trying to add the same chassis to the same >>>> HA chassis group over and over again. >>>> >>>> In an OpenStack setup with ~20 chassis, 40K ports and 5K routers we >>>> end up with 33+ million calls to add_to_ha_ref_chassis_info() that >>>> also performs a linear search for duplicates leading to about 270 >>>> million operations, while we only have 5K Sb HA chassis groups with >>>> 15 ref_chassis in each. I.e. only 5000 * 15 = 75.000 operations out >>>> of 270 million are actually useful. >>>> >>>> In practice that leads to 80 second recompute: >>>> >>>> inc_proc_eng|INFO|node: sync_from_sb, recompute (forced) took 80074ms >>>> >>>> Fix that by accumulating unique chassis per unique LR groups first >>>> and then updating ref_chassis with these unique values avoiding any >>>> unnecessary calls. Result is ~1000x performance improvement: >>>> >>>> inc_proc_eng|DBG|node: sync_from_sb, recompute (forced) took 75ms >>>> >>>> Considering this as a performance bug, because it makes OVN practically >>>> unusable in certain (default?) OpenStack configurations at scale. >>>> >>>> Fixes: b82d351976a1 ("ovn: Add generic HA chassis group") >>>> Reported-at: >>>> https://mail.openvswitch.org/pipermail/ovs-discuss/2023-August/052614.html >>>> Reported-by: Roberto Bartzen Acosta <[email protected]> >>>> Signed-off-by: Ilya Maximets <[email protected]> >>>> --- >>>> northd/northd.c | 118 ++++++++++++++++++++++++++++++------------------ >>>> 1 file changed, 75 insertions(+), 43 deletions(-) >>>> >>>> diff --git a/northd/northd.c b/northd/northd.c >>>> index 0a749931e..f5fb5e83a 100644 >>>> --- a/northd/northd.c >>>> +++ b/northd/northd.c >>>> @@ -799,6 +799,8 @@ struct lrouter_group { >>>> int n_router_dps; >>>> /* Set of ha_chassis_groups which are associated with the router >>>> dps. */ >>>> struct sset ha_chassis_groups; >>>> + /* Temporary storage for chassis references while computing HA >>>> groups. */ >>>> + struct hmapx tmp_ha_chassis; >>>> }; >>>> static struct ovn_datapath * >>>> @@ -8708,6 +8710,7 @@ build_lrouter_groups(struct hmap *lr_ports, >>>> struct ovs_list *lr_list) >>>> od->lr_group->router_dps[0] = od; >>>> od->lr_group->n_router_dps = 1; >>>> sset_init(&od->lr_group->ha_chassis_groups); >>>> + hmapx_init(&od->lr_group->tmp_ha_chassis); >>>> build_lrouter_groups__(lr_ports, od); >>>> } >>>> } >>>> @@ -17399,6 +17402,7 @@ destroy_datapaths_and_ports(struct >>>> ovn_datapaths *ls_datapaths, >>>> free(lr_group->router_dps); >>>> sset_destroy(&lr_group->ha_chassis_groups); >>>> + hmapx_destroy(&lr_group->tmp_ha_chassis); >>>> free(lr_group); >>>> } >>>> } >>>> @@ -17671,38 +17675,27 @@ ovnnb_db_run(struct northd_input *input_data, >>>> smap_destroy(&options); >>>> } >>>> -/* Stores the list of chassis which references an ha_chassis_group. >>>> +/* Stores the set of chassis which references an ha_chassis_group. >>>> */ >>>> struct ha_ref_chassis_info { >>>> const struct sbrec_ha_chassis_group *ha_chassis_group; >>>> - struct sbrec_chassis **ref_chassis; >>>> - size_t n_ref_chassis; >>>> - size_t free_slots; >>>> + struct hmapx ref_chassis; >>>> }; >>>> static void >>>> add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info, >>>> - const struct sbrec_chassis *chassis) >>>> + const struct hmapx *chassis) >>>> { >>>> - for (size_t j = 0; j < ref_ch_info->n_ref_chassis; j++) { >>>> - if (ref_ch_info->ref_chassis[j] == chassis) { >>>> - return; >>>> - } >>>> - } >>>> + if (!hmapx_count(&ref_ch_info->ref_chassis)) { >>>> + hmapx_destroy(&ref_ch_info->ref_chassis); >>>> + hmapx_clone(&ref_ch_info->ref_chassis, chassis); >>>> + } else { >>>> + struct hmapx_node *node; >>>> - /* Allocate space for 3 chassis at a time. */ >>>> - if (!ref_ch_info->free_slots) { >>>> - ref_ch_info->ref_chassis = >>>> - xrealloc(ref_ch_info->ref_chassis, >>>> - sizeof *ref_ch_info->ref_chassis * >>>> - (ref_ch_info->n_ref_chassis + 3)); >>>> - ref_ch_info->free_slots = 3; >>>> + HMAPX_FOR_EACH (node, chassis) { >>>> + hmapx_add(&ref_ch_info->ref_chassis, node->data); >>>> + } >>>> } >>>> - >>>> - ref_ch_info->ref_chassis[ref_ch_info->n_ref_chassis] = >>>> - CONST_CAST(struct sbrec_chassis *, chassis); >>>> - ref_ch_info->n_ref_chassis++; >>>> - ref_ch_info->free_slots--; >>>> } >>>> struct ha_chassis_group_node { >>>> @@ -17731,9 +17724,20 @@ update_sb_ha_group_ref_chassis( >>>> struct shash_node *node; >>>> SHASH_FOR_EACH_SAFE (node, ha_ref_chassis_map) { >>>> struct ha_ref_chassis_info *ha_ref_info = node->data; >>>> + size_t n = hmapx_count(&ha_ref_info->ref_chassis); >>>> + struct sbrec_chassis **ref_chassis; >>>> + struct hmapx_node *chassis_node; >>>> + >>>> + ref_chassis = xmalloc(n * sizeof *ref_chassis); >>>> + >>>> + n = 0; >>>> + HMAPX_FOR_EACH (chassis_node, &ha_ref_info->ref_chassis) { >>>> + ref_chassis[n++] = chassis_node->data; >>>> + } >>>> + >>>> >>>> sbrec_ha_chassis_group_set_ref_chassis(ha_ref_info->ha_chassis_group, >>>> - ha_ref_info->ref_chassis, >>>> - >>>> ha_ref_info->n_ref_chassis); >>>> + ref_chassis, n); >>>> + free(ref_chassis); >>>> /* Remove the updated group from the set. */ >>>> HMAP_FOR_EACH_WITH_HASH (ha_ch_grp_node, hmap_node, >>>> @@ -17745,7 +17749,7 @@ update_sb_ha_group_ref_chassis( >>>> break; >>>> } >>>> } >>>> - free(ha_ref_info->ref_chassis); >>>> + hmapx_destroy(&ha_ref_info->ref_chassis); >>>> free(ha_ref_info); >>>> shash_delete(ha_ref_chassis_map, node); >>>> } >>>> @@ -17782,10 +17786,9 @@ update_sb_ha_group_ref_chassis( >>>> * - 'ref_chassis' of hagrp1. >>>> */ >>>> static void >>>> -build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index >>>> *ha_ch_grp_by_name, >>>> - const struct sbrec_port_binding *sb, >>>> - struct ovn_port *op, >>>> - struct shash *ha_ref_chassis_map) >>>> +collect_lb_groups_for_ha_chassis_groups(const struct >>>> sbrec_port_binding *sb, >>>> + struct ovn_port *op, >>>> + struct hmapx *lr_groups) >>>> { >>>> struct lrouter_group *lr_group = NULL; >>>> for (size_t i = 0; i < op->od->n_router_ports; i++) { >>>> @@ -17804,18 +17807,39 @@ build_ha_chassis_group_ref_chassis(struct >>>> ovsdb_idl_index *ha_ch_grp_by_name, >>>> return; >>>> } >>>> - const char *ha_group_name; >>>> - SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) { >>>> - const struct sbrec_ha_chassis_group *sb_ha_chassis_grp; >>>> - sb_ha_chassis_grp = ha_chassis_group_lookup_by_name( >>>> - ha_ch_grp_by_name, ha_group_name); >>>> + hmapx_add(lr_groups, lr_group); >>>> + hmapx_add(&lr_group->tmp_ha_chassis, sb->chassis); >>>> +} >>>> + >>>> +static void >>>> +build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index >>>> *ha_ch_grp_by_name, >>>> + struct hmapx *lr_groups, >>>> + struct shash *ha_ref_chassis_map) >>>> +{ >>>> + struct hmapx_node *node; >>>> + >>>> + HMAPX_FOR_EACH (node, lr_groups) { >>>> + struct lrouter_group *lr_group = node->data; >>>> + const char *ha_group_name; >>>> + >>>> + SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) { >>>> + const struct sbrec_ha_chassis_group *sb_ha_chassis_grp; >>>> + >>>> + sb_ha_chassis_grp = ha_chassis_group_lookup_by_name( >>>> + ha_ch_grp_by_name, ha_group_name); >>>> + if (!sb_ha_chassis_grp) { >>>> + continue; >>>> + } >>>> - if (sb_ha_chassis_grp) { >>>> struct ha_ref_chassis_info *ref_ch_info = >>>> - shash_find_data(ha_ref_chassis_map, >>>> sb_ha_chassis_grp->name); >>>> + shash_find_data(ha_ref_chassis_map, >>>> sb_ha_chassis_grp->name); >>>> ovs_assert(ref_ch_info); >>>> - add_to_ha_ref_chassis_info(ref_ch_info, sb->chassis); >>>> + >>>> + add_to_ha_ref_chassis_info(ref_ch_info, >>>> &lr_group->tmp_ha_chassis); >>>> } >>>> + >>>> + hmapx_destroy(&lr_group->tmp_ha_chassis); >>>> + hmapx_init(&lr_group->tmp_ha_chassis); >>>> } >>>> } >>>> @@ -17830,15 +17854,19 @@ handle_port_binding_changes(struct >>>> ovsdb_idl_txn *ovnsb_txn, >>>> struct hmap *ls_ports, >>>> struct shash *ha_ref_chassis_map) >>>> { >>>> + struct hmapx lr_groups = HMAPX_INITIALIZER(&lr_groups); >>>> const struct sbrec_port_binding *sb; >>>> bool build_ha_chassis_ref = false; >>>> + >>>> if (ovnsb_txn) { >>>> const struct sbrec_ha_chassis_group *ha_ch_grp; >>>> SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_ch_grp, >>>> sb_ha_ch_grp_table) { >>>> if (ha_ch_grp->n_ha_chassis > 1) { >>>> - struct ha_ref_chassis_info *ref_ch_info = >>>> - xzalloc(sizeof *ref_ch_info); >>>> + struct ha_ref_chassis_info *ref_ch_info; >>>> + >>>> + ref_ch_info = xzalloc(sizeof *ref_ch_info); >>>> ref_ch_info->ha_chassis_group = ha_ch_grp; >>>> + hmapx_init(&ref_ch_info->ref_chassis); >>>> build_ha_chassis_ref = true; >>>> shash_add(ha_ref_chassis_map, ha_ch_grp->name, >>>> ref_ch_info); >>>> } >>>> @@ -17879,12 +17907,16 @@ handle_port_binding_changes(struct >>>> ovsdb_idl_txn *ovnsb_txn, >>>> } >>>> if (build_ha_chassis_ref && ovnsb_txn && sb->chassis) { >>>> - /* Check and add the chassis which has claimed this 'sb' >>>> - * to the ha chassis group's ref_chassis if required. */ >>>> - build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, >>>> sb, op, >>>> - ha_ref_chassis_map); >>>> + /* Check and collect the chassis which has claimed this 'sb' >>>> + * in relation to LR groups. */ >>>> + collect_lb_groups_for_ha_chassis_groups(sb, op, &lr_groups); >>>> } >>>> } >>>> + >>>> + /* Update ha chassis group's ref_chassis if required. */ >>>> + build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, &lr_groups, >>>> + ha_ref_chassis_map); >>>> + hmapx_destroy(&lr_groups); >>>> } >>>> /* Handle a fairly small set of changes in the southbound database. */ >>> >> > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
