On 8/25/23 18:52, Mark Michelson wrote:
> Hi Ilya,
> 
> I merged this change into main and branch-23.06. The conflicts were easy 
> to resolve in branch-23.06, but when attempting to merge to 23.03, I was 
> not as comfortable trying to resolve the conflicts. Could you please 
> post a version of the patch that applies to 23.03 please?

Sure, I'll do that.  If not today, then on Monday.

Best regards, Ilya Maximets.

> 
> Thanks,
> Mark Michelson
> 
> On 8/24/23 15:02, Mark Michelson wrote:
>> Thanks Ilya,
>>
>> Acked-by: Mark Michelson <[email protected]>
>>
>> The topic of backporting this patch was brought up in the OVN 
>> development IRC meeting today, and it was universally agreed there that 
>> this performance issue is so bad that we should backport this fix when 
>> merging.
>>
>> On 8/23/23 17:41, Ilya Maximets wrote:
>>> Current implementation of 'ref_chassis' recompute is horribly slow.
>>> For each port binding it finds LR group, then it adds a chassis of the
>>> current port binding to each HA chassis group of this LR group.
>>>
>>> The number of ports is much greater than number of chassis.  And the
>>> number of distinct LR groups is even smaller than a number of chassis
>>> in a typical setup.  Chances are, the setup only has one LR group.
>>> So, northd today is trying to add the same chassis to the same
>>> HA chassis group over and over again.
>>>
>>> In an OpenStack setup with ~20 chassis, 40K ports and 5K routers we
>>> end up with 33+ million calls to add_to_ha_ref_chassis_info() that
>>> also performs a linear search for duplicates leading to about 270
>>> million operations, while we only have 5K Sb HA chassis groups with
>>> 15 ref_chassis in each.  I.e. only 5000 * 15 = 75.000 operations out
>>> of 270 million are actually useful.
>>>
>>> In practice that leads to 80 second recompute:
>>>
>>>    inc_proc_eng|INFO|node: sync_from_sb, recompute (forced) took 80074ms
>>>
>>> Fix that by accumulating unique chassis per unique LR groups first
>>> and then updating ref_chassis with these unique values avoiding any
>>> unnecessary calls.  Result is ~1000x performance improvement:
>>>
>>>    inc_proc_eng|DBG|node: sync_from_sb, recompute (forced) took 75ms
>>>
>>> Considering this as a performance bug, because it makes OVN practically
>>> unusable in certain (default?) OpenStack configurations at scale.
>>>
>>> Fixes: b82d351976a1 ("ovn: Add generic HA chassis group")
>>> Reported-at: 
>>> https://mail.openvswitch.org/pipermail/ovs-discuss/2023-August/052614.html
>>> Reported-by: Roberto Bartzen Acosta <[email protected]>
>>> Signed-off-by: Ilya Maximets <[email protected]>
>>> ---
>>>   northd/northd.c | 118 ++++++++++++++++++++++++++++++------------------
>>>   1 file changed, 75 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/northd/northd.c b/northd/northd.c
>>> index 0a749931e..f5fb5e83a 100644
>>> --- a/northd/northd.c
>>> +++ b/northd/northd.c
>>> @@ -799,6 +799,8 @@ struct lrouter_group {
>>>       int n_router_dps;
>>>       /* Set of ha_chassis_groups which are associated with the router 
>>> dps. */
>>>       struct sset ha_chassis_groups;
>>> +    /* Temporary storage for chassis references while computing HA 
>>> groups. */
>>> +    struct hmapx tmp_ha_chassis;
>>>   };
>>>   static struct ovn_datapath *
>>> @@ -8708,6 +8710,7 @@ build_lrouter_groups(struct hmap *lr_ports, 
>>> struct ovs_list *lr_list)
>>>               od->lr_group->router_dps[0] = od;
>>>               od->lr_group->n_router_dps = 1;
>>>               sset_init(&od->lr_group->ha_chassis_groups);
>>> +            hmapx_init(&od->lr_group->tmp_ha_chassis);
>>>               build_lrouter_groups__(lr_ports, od);
>>>           }
>>>       }
>>> @@ -17399,6 +17402,7 @@ destroy_datapaths_and_ports(struct 
>>> ovn_datapaths *ls_datapaths,
>>>               free(lr_group->router_dps);
>>>               sset_destroy(&lr_group->ha_chassis_groups);
>>> +            hmapx_destroy(&lr_group->tmp_ha_chassis);
>>>               free(lr_group);
>>>           }
>>>       }
>>> @@ -17671,38 +17675,27 @@ ovnnb_db_run(struct northd_input *input_data,
>>>       smap_destroy(&options);
>>>   }
>>> -/* Stores the list of chassis which references an ha_chassis_group.
>>> +/* Stores the set of chassis which references an ha_chassis_group.
>>>    */
>>>   struct ha_ref_chassis_info {
>>>       const struct sbrec_ha_chassis_group *ha_chassis_group;
>>> -    struct sbrec_chassis **ref_chassis;
>>> -    size_t n_ref_chassis;
>>> -    size_t free_slots;
>>> +    struct hmapx ref_chassis;
>>>   };
>>>   static void
>>>   add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info,
>>> -                           const struct sbrec_chassis *chassis)
>>> +                           const struct hmapx *chassis)
>>>   {
>>> -    for (size_t j = 0; j < ref_ch_info->n_ref_chassis; j++) {
>>> -        if (ref_ch_info->ref_chassis[j] == chassis) {
>>> -           return;
>>> -        }
>>> -    }
>>> +    if (!hmapx_count(&ref_ch_info->ref_chassis)) {
>>> +        hmapx_destroy(&ref_ch_info->ref_chassis);
>>> +        hmapx_clone(&ref_ch_info->ref_chassis, chassis);
>>> +    } else {
>>> +        struct hmapx_node *node;
>>> -    /* Allocate space for 3 chassis at a time. */
>>> -    if (!ref_ch_info->free_slots) {
>>> -        ref_ch_info->ref_chassis =
>>> -            xrealloc(ref_ch_info->ref_chassis,
>>> -                     sizeof *ref_ch_info->ref_chassis *
>>> -                     (ref_ch_info->n_ref_chassis + 3));
>>> -        ref_ch_info->free_slots = 3;
>>> +        HMAPX_FOR_EACH (node, chassis) {
>>> +            hmapx_add(&ref_ch_info->ref_chassis, node->data);
>>> +        }
>>>       }
>>> -
>>> -    ref_ch_info->ref_chassis[ref_ch_info->n_ref_chassis] =
>>> -        CONST_CAST(struct sbrec_chassis *, chassis);
>>> -    ref_ch_info->n_ref_chassis++;
>>> -    ref_ch_info->free_slots--;
>>>   }
>>>   struct ha_chassis_group_node {
>>> @@ -17731,9 +17724,20 @@ update_sb_ha_group_ref_chassis(
>>>       struct shash_node *node;
>>>       SHASH_FOR_EACH_SAFE (node, ha_ref_chassis_map) {
>>>           struct ha_ref_chassis_info *ha_ref_info = node->data;
>>> +        size_t n = hmapx_count(&ha_ref_info->ref_chassis);
>>> +        struct sbrec_chassis **ref_chassis;
>>> +        struct hmapx_node *chassis_node;
>>> +
>>> +        ref_chassis = xmalloc(n * sizeof *ref_chassis);
>>> +
>>> +        n = 0;
>>> +        HMAPX_FOR_EACH (chassis_node, &ha_ref_info->ref_chassis) {
>>> +            ref_chassis[n++] = chassis_node->data;
>>> +        }
>>> +
>>>           
>>> sbrec_ha_chassis_group_set_ref_chassis(ha_ref_info->ha_chassis_group,
>>> -                                               ha_ref_info->ref_chassis,
>>> -                                               
>>> ha_ref_info->n_ref_chassis);
>>> +                                               ref_chassis, n);
>>> +        free(ref_chassis);
>>>           /* Remove the updated group from the set. */
>>>           HMAP_FOR_EACH_WITH_HASH (ha_ch_grp_node, hmap_node,
>>> @@ -17745,7 +17749,7 @@ update_sb_ha_group_ref_chassis(
>>>                   break;
>>>               }
>>>           }
>>> -        free(ha_ref_info->ref_chassis);
>>> +        hmapx_destroy(&ha_ref_info->ref_chassis);
>>>           free(ha_ref_info);
>>>           shash_delete(ha_ref_chassis_map, node);
>>>       }
>>> @@ -17782,10 +17786,9 @@ update_sb_ha_group_ref_chassis(
>>>    *  - 'ref_chassis' of hagrp1.
>>>    */
>>>   static void
>>> -build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index 
>>> *ha_ch_grp_by_name,
>>> -                                   const struct sbrec_port_binding *sb,
>>> -                                   struct ovn_port *op,
>>> -                                   struct shash *ha_ref_chassis_map)
>>> +collect_lb_groups_for_ha_chassis_groups(const struct 
>>> sbrec_port_binding *sb,
>>> +                                        struct ovn_port *op,
>>> +                                        struct hmapx *lr_groups)
>>>   {
>>>       struct lrouter_group *lr_group = NULL;
>>>       for (size_t i = 0; i < op->od->n_router_ports; i++) {
>>> @@ -17804,18 +17807,39 @@ build_ha_chassis_group_ref_chassis(struct 
>>> ovsdb_idl_index *ha_ch_grp_by_name,
>>>           return;
>>>       }
>>> -    const char *ha_group_name;
>>> -    SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) {
>>> -        const struct sbrec_ha_chassis_group *sb_ha_chassis_grp;
>>> -        sb_ha_chassis_grp = ha_chassis_group_lookup_by_name(
>>> -            ha_ch_grp_by_name, ha_group_name);
>>> +    hmapx_add(lr_groups, lr_group);
>>> +    hmapx_add(&lr_group->tmp_ha_chassis, sb->chassis);
>>> +}
>>> +
>>> +static void
>>> +build_ha_chassis_group_ref_chassis(struct ovsdb_idl_index 
>>> *ha_ch_grp_by_name,
>>> +                                   struct hmapx *lr_groups,
>>> +                                   struct shash *ha_ref_chassis_map)
>>> +{
>>> +    struct hmapx_node *node;
>>> +
>>> +    HMAPX_FOR_EACH (node, lr_groups) {
>>> +        struct lrouter_group *lr_group = node->data;
>>> +        const char *ha_group_name;
>>> +
>>> +        SSET_FOR_EACH (ha_group_name, &lr_group->ha_chassis_groups) {
>>> +            const struct sbrec_ha_chassis_group *sb_ha_chassis_grp;
>>> +
>>> +            sb_ha_chassis_grp = ha_chassis_group_lookup_by_name(
>>> +                                    ha_ch_grp_by_name, ha_group_name);
>>> +            if (!sb_ha_chassis_grp) {
>>> +                continue;
>>> +            }
>>> -        if (sb_ha_chassis_grp) {
>>>               struct ha_ref_chassis_info *ref_ch_info =
>>> -            shash_find_data(ha_ref_chassis_map, 
>>> sb_ha_chassis_grp->name);
>>> +                shash_find_data(ha_ref_chassis_map, 
>>> sb_ha_chassis_grp->name);
>>>               ovs_assert(ref_ch_info);
>>> -            add_to_ha_ref_chassis_info(ref_ch_info, sb->chassis);
>>> +
>>> +            add_to_ha_ref_chassis_info(ref_ch_info, 
>>> &lr_group->tmp_ha_chassis);
>>>           }
>>> +
>>> +        hmapx_destroy(&lr_group->tmp_ha_chassis);
>>> +        hmapx_init(&lr_group->tmp_ha_chassis);
>>>       }
>>>   }
>>> @@ -17830,15 +17854,19 @@ handle_port_binding_changes(struct 
>>> ovsdb_idl_txn *ovnsb_txn,
>>>                   struct hmap *ls_ports,
>>>                   struct shash *ha_ref_chassis_map)
>>>   {
>>> +    struct hmapx lr_groups = HMAPX_INITIALIZER(&lr_groups);
>>>       const struct sbrec_port_binding *sb;
>>>       bool build_ha_chassis_ref = false;
>>> +
>>>       if (ovnsb_txn) {
>>>           const struct sbrec_ha_chassis_group *ha_ch_grp;
>>>           SBREC_HA_CHASSIS_GROUP_TABLE_FOR_EACH (ha_ch_grp, 
>>> sb_ha_ch_grp_table) {
>>>               if (ha_ch_grp->n_ha_chassis > 1) {
>>> -                struct ha_ref_chassis_info *ref_ch_info =
>>> -                    xzalloc(sizeof *ref_ch_info);
>>> +                struct ha_ref_chassis_info *ref_ch_info;
>>> +
>>> +                ref_ch_info = xzalloc(sizeof *ref_ch_info);
>>>                   ref_ch_info->ha_chassis_group = ha_ch_grp;
>>> +                hmapx_init(&ref_ch_info->ref_chassis);
>>>                   build_ha_chassis_ref = true;
>>>                   shash_add(ha_ref_chassis_map, ha_ch_grp->name, 
>>> ref_ch_info);
>>>               }
>>> @@ -17879,12 +17907,16 @@ handle_port_binding_changes(struct 
>>> ovsdb_idl_txn *ovnsb_txn,
>>>           }
>>>           if (build_ha_chassis_ref && ovnsb_txn && sb->chassis) {
>>> -            /* Check and add the chassis which has claimed this 'sb'
>>> -             * to the ha chassis group's ref_chassis if required. */
>>> -            build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, 
>>> sb, op,
>>> -                                               ha_ref_chassis_map);
>>> +            /* Check and collect the chassis which has claimed this 'sb'
>>> +             * in relation to LR groups. */
>>> +            collect_lb_groups_for_ha_chassis_groups(sb, op, &lr_groups);
>>>           }
>>>       }
>>> +
>>> +    /* Update ha chassis group's ref_chassis if required. */
>>> +    build_ha_chassis_group_ref_chassis(sb_ha_ch_grp_by_name, &lr_groups,
>>> +                                       ha_ref_chassis_map);
>>> +    hmapx_destroy(&lr_groups);
>>>   }
>>>   /* Handle a fairly small set of changes in the southbound database. */
>>
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to