On 8/25/23 18:55, Ilya Maximets wrote:
> On 8/25/23 18:52, Mark Michelson wrote:
>> Hi Ilya,
>>
>> I merged this change into main and branch-23.06. The conflicts were easy 
>> to resolve in branch-23.06, but when attempting to merge to 23.03, I was 
>> not as comfortable trying to resolve the conflicts. Could you please 
>> post a version of the patch that applies to 23.03 please?
> 
> Sure, I'll do that.  If not today, then on Monday.

Posted a backport here:
  
https://patchwork.ozlabs.org/project/ovn/patch/[email protected]/

It should be applicable to all the branches down to 22.03.
There were no significant changes during backport, mostly
a struct northd_input change that caused conflicts.

> 
> Best regards, Ilya Maximets.
> 
>>
>> Thanks,
>> Mark Michelson
>>
>> On 8/24/23 15:02, Mark Michelson wrote:
>>> Thanks Ilya,
>>>
>>> Acked-by: Mark Michelson <[email protected]>
>>>
>>> The topic of backporting this patch was brought up in the OVN 
>>> development IRC meeting today, and it was universally agreed there that 
>>> this performance issue is so bad that we should backport this fix when 
>>> merging.
>>>
>>> On 8/23/23 17:41, Ilya Maximets wrote:
>>>> Current implementation of 'ref_chassis' recompute is horribly slow.
>>>> For each port binding it finds LR group, then it adds a chassis of the
>>>> current port binding to each HA chassis group of this LR group.
>>>>
>>>> The number of ports is much greater than number of chassis.  And the
>>>> number of distinct LR groups is even smaller than a number of chassis
>>>> in a typical setup.  Chances are, the setup only has one LR group.
>>>> So, northd today is trying to add the same chassis to the same
>>>> HA chassis group over and over again.
>>>>
>>>> In an OpenStack setup with ~20 chassis, 40K ports and 5K routers we
>>>> end up with 33+ million calls to add_to_ha_ref_chassis_info() that
>>>> also performs a linear search for duplicates leading to about 270
>>>> million operations, while we only have 5K Sb HA chassis groups with
>>>> 15 ref_chassis in each.  I.e. only 5000 * 15 = 75.000 operations out
>>>> of 270 million are actually useful.
>>>>
>>>> In practice that leads to 80 second recompute:
>>>>
>>>>    inc_proc_eng|INFO|node: sync_from_sb, recompute (forced) took 80074ms
>>>>
>>>> Fix that by accumulating unique chassis per unique LR groups first
>>>> and then updating ref_chassis with these unique values avoiding any
>>>> unnecessary calls.  Result is ~1000x performance improvement:
>>>>
>>>>    inc_proc_eng|DBG|node: sync_from_sb, recompute (forced) took 75ms
>>>>
>>>> Considering this as a performance bug, because it makes OVN practically
>>>> unusable in certain (default?) OpenStack configurations at scale.
>>>>
>>>> Fixes: b82d351976a1 ("ovn: Add generic HA chassis group")
>>>> Reported-at: 
>>>> https://mail.openvswitch.org/pipermail/ovs-discuss/2023-August/052614.html
>>>> Reported-by: Roberto Bartzen Acosta <[email protected]>
>>>> Signed-off-by: Ilya Maximets <[email protected]>
>>>> ---
>>>>   northd/northd.c | 118 ++++++++++++++++++++++++++++++------------------
>>>>   1 file changed, 75 insertions(+), 43 deletions(-)
>>>>
>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>> index 0a749931e..f5fb5e83a 100644
>>>> --- a/northd/northd.c
>>>> +++ b/northd/northd.c
>>>> @@ -799,6 +799,8 @@ struct lrouter_group {
>>>>       int n_router_dps;
>>>>       /* Set of ha_chassis_groups which are associated with the router 
>>>> dps. */
>>>>       struct sset ha_chassis_groups;
>>>> +    /* Temporary storage for chassis references while computing HA 
>>>> groups. */
>>>> +    struct hmapx tmp_ha_chassis;
>>>>   };
>>>>   static struct ovn_datapath *
>>>> @@ -8708,6 +8710,7 @@ build_lrouter_groups(struct hmap *lr_ports, 
>>>> struct ovs_list *lr_list)
>>>>               od->lr_group->router_dps[0] = od;
>>>>               od->lr_group->n_router_dps = 1;
>>>>               sset_init(&od->lr_group->ha_chassis_groups);
>>>> +            hmapx_init(&od->lr_group->tmp_ha_chassis);
>>>>               build_lrouter_groups__(lr_ports, od);
>>>>           }
>>>>       }
>>>> @@ -17399,6 +17402,7 @@ destroy_datapaths_and_ports(struct 
>>>> ovn_datapaths *ls_datapaths,
>>>>               free(lr_group->router_dps);
>>>>               sset_destroy(&lr_group->ha_chassis_groups);
>>>> +            hmapx_destroy(&lr_group->tmp_ha_chassis);
>>>>               free(lr_group);
>>>>           }
>>>>       }
>>>> @@ -17671,38 +17675,27 @@ ovnnb_db_run(struct northd_input *input_data,
>>>>       smap_destroy(&options);
>>>>   }
>>>> -/* Stores the list of chassis which references an ha_chassis_group.
>>>> +/* Stores the set of chassis which references an ha_chassis_group.
>>>>    */
>>>>   struct ha_ref_chassis_info {
>>>>       const struct sbrec_ha_chassis_group *ha_chassis_group;
>>>> -    struct sbrec_chassis **ref_chassis;
>>>> -    size_t n_ref_chassis;
>>>> -    size_t free_slots;
>>>> +    struct hmapx ref_chassis;
>>>>   };
>>>>   static void
>>>>   add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info,
>>>> -                           const struct sbrec_chassis *chassis)
>>>> +                           const struct hmapx *chassis)
>>>>   {
>>>> -    for (size_t j = 0; j < ref_ch_info->n_ref_chassis; j++) {
>>>> -        if (ref_ch_info->ref_chassis[j] == chassis) {
>>>> -           return;
>>>> -        }
>>>> -    }
>>>> +    if (!hmapx_count(&ref_ch_info->ref_chassis)) {
>>>> +        hmapx_destroy(&ref_ch_info->ref_chassis);
>>>> +        hmapx_clone(&ref_ch_info->ref_chassis, chassis);
>>>> +    } else {
>>>> +        struct hmapx_node *node;
>>>> -    /* Allocate space for 3 chassis at a time. */
>>>> -    if (!ref_ch_info->free_slots) {
>>>> -        ref_ch_info->ref_chassis =
>>>> -            xrealloc(ref_ch_info->ref_chassis,
>>>> -                     sizeof *ref_ch_info->ref_chassis *
>>>> -                     (ref_ch_info->n_ref_chassis + 3));
>>>> -        ref_ch_info->free_slots = 3;
>>>> +        HMAPX_FOR_EACH (node, chassis) {
>>>> +            hmapx_add(&ref_ch_info->ref_chassis, node->data);
>>>> +        }
>>>>       }
>>>> -
>>>> -    ref_ch_info->ref_chassis[ref_ch_info->n_ref_chassis] =
>>>> -        CONST_CAST(struct sbrec_chassis *, chassis);
>>>> -    ref_ch_info->n_ref_chassis++;
>>>> -    ref_ch_info->free_slots--;
>>>>   }
>>>>   struct ha_chassis_group_node {
>>>> @@ -17731,9 +17724,20 @@ update_sb_ha_group_ref_chassis(
>>>>       struct shash_node *node;
>>>>       SHASH_FOR_EACH_SAFE (node, ha_ref_chassis_map) {
>>>>           struct ha_ref_chassis_info *ha_ref_info = node->data;
>>>> +        size_t n = hmapx_count(&ha_ref_info->ref_chassis);
>>>> +        struct sbrec_chassis **ref_chassis;
>>>> +        struct hmapx_node *chassis_node;
>>>> +
>>>> +        ref_chassis = xmalloc(n * sizeof *ref_chassis);
>>>> +
>>>> +        n = 0;
>>>> +        HMAPX_FOR_EACH (chassis_node, &ha_ref_info->ref_chassis) {
>>>> +            ref_chassis[n++] = chassis_node->data;
>>>> +        }
>>>> +
>>>>           
>>>> sbrec_ha_chassis_group_set_ref_chassis(ha_ref_info->ha_chassis_group,
>>>> -                                               ha_ref_info->ref_chassis,
>>>> -                                               
>>>> ha_ref_info->n_ref_chassis);
>>>> +                                               ref_chassis, n);
>>>> +        free(ref_chassis);
>>>>           /* Remove the updated group from the set. */
>>>>           HMAP_FOR_EACH_WITH_HASH (ha_ch_grp_node, hmap_node,
>>>> @@ -17745,7 +17749,7 @@ update_sb_ha_group_ref_chassis(
>>>>                   break;
>>>>               }
>>>>           }
>>>> -        free(ha_ref_info->ref_chassis);
>>>> +        hmapx_destroy(&ha_ref_info->ref_chassis);
>>>>           free(ha_ref_info);
>>>>           shash_delete(ha_ref_chassis_map, node);
>>>>       }
>>>> @@ -17782,10 +17786,9 @@ update_sb_ha_group_ref_chassis(
>>>>    *  - 'ref_chassis' of hagrp1.
>>>>    */
>>>>   static void
>>>> -build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index 
>>>> *ha_ch_grp_by_name,
>>>> -                                   const struct sbrec_port_binding *sb,
>>>> -                                   struct ovn_port *op,
>>>> -                                   struct shash *ha_ref_chassis_map)
>>>> +collect_lb_groups_for_ha_chassis_groups(const struct 
>>>> sbrec_port_binding *sb,
>>>> +                                        struct ovn_port *op,
>>>> +                                        struct hmapx *lr_groups)
>>>>   {
>>>>       struct lrouter_group *lr_group = NULL;
>>>>       for (size_t i = 0; i < op->od->n_router_ports; i++) {
>>>> @@ -17804,18 +17807,39 @@ build_ha_chassis_group_ref_chassis(struct 
>>>> ovsdb_idl_index *ha_ch_grp_by_name,
>>>>           return;
>>>>       }
>>>> -    const char *ha_group_name;
>>>> -    SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) {
>>>> -        const struct sbrec_ha_chassis_group *sb_ha_chassis_grp;
>>>> -        sb_ha_chassis_grp = ha_chassis_group_lookup_by_name(
>>>> -            ha_ch_grp_by_name, ha_group_name);
>>>> +    hmapx_add(lr_groups, lr_group);
>>>> +    hmapx_add(&lr_group->tmp_ha_chassis, sb->chassis);
>>>> +}
>>>> +
>>>> +static void
>>>> +build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index 
>>>> *ha_ch_grp_by_name,
>>>> +                                   struct hmapx *lr_groups,
>>>> +                                   struct shash *ha_ref_chassis_map)
>>>> +{
>>>> +    struct hmapx_node *node;
>>>> +
>>>> +    HMAPX_FOR_EACH (node, lr_groups) {
>>>> +        struct lrouter_group *lr_group = node->data;
>>>> +        const char *ha_group_name;
>>>> +
>>>> +        SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) {
>>>> +            const struct sbrec_ha_chassis_group *sb_ha_chassis_grp;
>>>> +
>>>> +            sb_ha_chassis_grp = ha_chassis_group_lookup_by_name(
>>>> +                                    ha_ch_grp_by_name, ha_group_name);
>>>> +            if (!sb_ha_chassis_grp) {
>>>> +                continue;
>>>> +            }
>>>> -        if (sb_ha_chassis_grp) {
>>>>               struct ha_ref_chassis_info *ref_ch_info =
>>>> -            shash_find_data(ha_ref_chassis_map, 
>>>> sb_ha_chassis_grp->name);
>>>> +                shash_find_data(ha_ref_chassis_map, 
>>>> sb_ha_chassis_grp->name);
>>>>               ovs_assert(ref_ch_info);
>>>> -            add_to_ha_ref_chassis_info(ref_ch_info, sb->chassis);
>>>> +
>>>> +            add_to_ha_ref_chassis_info(ref_ch_info, 
>>>> &lr_group->tmp_ha_chassis);
>>>>           }
>>>> +
>>>> +        hmapx_destroy(&lr_group->tmp_ha_chassis);
>>>> +        hmapx_init(&lr_group->tmp_ha_chassis);
>>>>       }
>>>>   }
>>>> @@ -17830,15 +17854,19 @@ handle_port_binding_changes(struct 
>>>> ovsdb_idl_txn *ovnsb_txn,
>>>>                   struct hmap *ls_ports,
>>>>                   struct shash *ha_ref_chassis_map)
>>>>   {
>>>> +    struct hmapx lr_groups = HMAPX_INITIALIZER(&lr_groups);
>>>>       const struct sbrec_port_binding *sb;
>>>>       bool build_ha_chassis_ref = false;
>>>> +
>>>>       if (ovnsb_txn) {
>>>>           const struct sbrec_ha_chassis_group *ha_ch_grp;
>>>>           SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_ch_grp, 
>>>> sb_ha_ch_grp_table) {
>>>>               if (ha_ch_grp->n_ha_chassis > 1) {
>>>> -                struct ha_ref_chassis_info *ref_ch_info =
>>>> -                    xzalloc(sizeof *ref_ch_info);
>>>> +                struct ha_ref_chassis_info *ref_ch_info;
>>>> +
>>>> +                ref_ch_info = xzalloc(sizeof *ref_ch_info);
>>>>                   ref_ch_info->ha_chassis_group = ha_ch_grp;
>>>> +                hmapx_init(&ref_ch_info->ref_chassis);
>>>>                   build_ha_chassis_ref = true;
>>>>                   shash_add(ha_ref_chassis_map, ha_ch_grp->name, 
>>>> ref_ch_info);
>>>>               }
>>>> @@ -17879,12 +17907,16 @@ handle_port_binding_changes(struct 
>>>> ovsdb_idl_txn *ovnsb_txn,
>>>>           }
>>>>           if (build_ha_chassis_ref && ovnsb_txn && sb->chassis) {
>>>> -            /* Check and add the chassis which has claimed this 'sb'
>>>> -             * to the ha chassis group's ref_chassis if required. */
>>>> -            build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, 
>>>> sb, op,
>>>> -                                               ha_ref_chassis_map);
>>>> +            /* Check and collect the chassis which has claimed this 'sb'
>>>> +             * in relation to LR groups. */
>>>> +            collect_lb_groups_for_ha_chassis_groups(sb, op, &lr_groups);
>>>>           }
>>>>       }
>>>> +
>>>> +    /* Update ha chassis group's ref_chassis if required. */
>>>> +    build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, &lr_groups,
>>>> +                                       ha_ref_chassis_map);
>>>> +    hmapx_destroy(&lr_groups);
>>>>   }
>>>>   /* Handle a fairly small set of changes in the southbound database. */
>>>
>>
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to