On 7/18/23 15:17, Ilya Maximets wrote:
> On 7/18/23 05:32, Han Zhou wrote:
>>
>>
>> On Fri, Jul 7, 2023 at 1:55 PM <[email protected] <mailto:[email protected]>> 
>> wrote:
>>>
>>> From: Numan Siddique <[email protected] <mailto:[email protected]>>
>>>
>>> For every a given load balancer group 'A', northd engine data maintains
>>> a bitmap of datapaths associated to this lb group.  So when lb group 'A'
>>> gets associated to a logical switch 's1', the bitmap index of 's1' is set
>>> in its bitmap.
>>>
>>> In order to handle the load balancer group changes incrementally for a
>>> logical switch, we need to set and clear the bitmap bits accordingly.
>>> And this patch does it.
>>>
>>> Signed-off-by: Numan Siddique <[email protected] <mailto:[email protected]>>
>>> ---
>>>  lib/lb.c            |  45 +++++++++++++++---
>>>  lib/lb.h            |  33 ++++++--------
>>>  northd/northd.c     | 109 ++++++++++++++++++++++++++++++++++++++------
>>>  tests/ovn-northd.at <http://ovn-northd.at> |   6 +--
>>>  4 files changed, 150 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/lib/lb.c b/lib/lb.c
>>> index b2b6473c1d..a0426cc42e 100644
>>> --- a/lib/lb.c
>>> +++ b/lib/lb.c
>>> @@ -1100,7 +1100,7 @@ ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths 
>>> *lb_dps, size_t n,
>>>
>>>  void
>>>  ovn_lb_datapaths_remove_ls(struct ovn_lb_datapaths *lb_dps, size_t n,
>>> -                        struct ovn_datapath **ods)
>>> +                           struct ovn_datapath **ods)
>>>  {
>>>      for (size_t i = 0; i < n; i++) {
>>>          if (bitmap_is_set(lb_dps->nb_ls_map, ods[i]->index)) {
>>> @@ -1112,14 +1112,14 @@ ovn_lb_datapaths_remove_ls(struct ovn_lb_datapaths 
>>> *lb_dps, size_t n,
>>>
>>>  struct ovn_lb_group_datapaths *
>>>  ovn_lb_group_datapaths_create(const struct ovn_lb_group *lb_group,
>>> -                              size_t max_ls_datapaths,
>>> -                              size_t max_lr_datapaths)
>>> +                              size_t n_ls_datapaths,
>>> +                              size_t n_lr_datapaths)
>>>  {
>>>      struct ovn_lb_group_datapaths *lb_group_dps =
>>>          xzalloc(sizeof *lb_group_dps);
>>>      lb_group_dps->lb_group = lb_group;
>>> -    lb_group_dps->ls = xmalloc(max_ls_datapaths * sizeof 
>>> *lb_group_dps->ls);
>>> -    lb_group_dps->lr = xmalloc(max_lr_datapaths * sizeof 
>>> *lb_group_dps->lr);
>>> +    lb_group_dps->nb_ls_map = bitmap_allocate(n_ls_datapaths);
>>> +    lb_group_dps->nb_lr_map = bitmap_allocate(n_lr_datapaths);
>>>
>>>      return lb_group_dps;
>>>  }
>>> @@ -1127,8 +1127,8 @@ ovn_lb_group_datapaths_create(const struct 
>>> ovn_lb_group *lb_group,
>>>  void
>>>  ovn_lb_group_datapaths_destroy(struct ovn_lb_group_datapaths *lb_group_dps)
>>>  {
>>> -    free(lb_group_dps->ls);
>>> -    free(lb_group_dps->lr);
>>> +    bitmap_free(lb_group_dps->nb_lr_map);
>>> +    bitmap_free(lb_group_dps->nb_ls_map);
>>>      free(lb_group_dps);
>>>  }
>>>
>>> @@ -1146,3 +1146,34 @@ ovn_lb_group_datapaths_find(const struct hmap 
>>> *lb_group_dps_map,
>>>      }
>>>      return NULL;
>>>  }
>>> +
>>> +void
>>> +ovn_lb_group_datapaths_add_ls(struct ovn_lb_group_datapaths *lbg_dps, 
>>> size_t n,
>>> +                              struct ovn_datapath **ods)
>>> +{
>>> +    for (size_t i = 0; i < n; i++) {
>>> +        if (!bitmap_is_set(lbg_dps->nb_ls_map, ods[i]->index)) {
>>> +            bitmap_set1(lbg_dps->nb_ls_map, ods[i]->index);
>>> +            lbg_dps->n_nb_ls++;
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +void
>>> +ovn_lb_group_datapaths_remove_ls(struct ovn_lb_group_datapaths *lbg_dps,
>>> +                                 size_t n, struct ovn_datapath **ods)
>>> +{
>>> +    for (size_t i = 0; i < n; i++) {
>>> +        if (bitmap_is_set(lbg_dps->nb_ls_map, ods[i]->index)) {
>>> +            bitmap_set0(lbg_dps->nb_ls_map, ods[i]->index);
>>> +            lbg_dps->n_nb_ls--;
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +void
>>> +ovn_lb_group_datapaths_add_lr(struct ovn_lb_group_datapaths *lbg_dps,
>>> +                              struct ovn_datapath *lr)
>>> +{
>>> +    bitmap_set1(lbg_dps->nb_lr_map, lr->index);
>>> +}
>>> diff --git a/lib/lb.h b/lib/lb.h
>>> index ac33333a7f..08723e8ef3 100644
>>> --- a/lib/lb.h
>>> +++ b/lib/lb.h
>>> @@ -19,6 +19,7 @@
>>>
>>>  #include <sys/types.h>
>>>  #include <netinet/in.h>
>>> +#include "lib/bitmap.h"
>>>  #include "lib/smap.h"
>>>  #include "openvswitch/hmap.h"
>>>  #include "ovn-util.h"
>>> @@ -179,6 +180,9 @@ void ovn_lb_datapaths_add_lr(struct ovn_lb_datapaths *, 
>>> size_t n,
>>>                               struct ovn_datapath **);
>>>  void ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths *, size_t n,
>>>                               struct ovn_datapath **);
>>> +void ovn_lb_datapaths_set_ls(struct ovn_lb_datapaths *, size_t n,
>>> +                             struct ovn_datapath **);
>>> +
>>>  void ovn_lb_datapaths_remove_ls(struct ovn_lb_datapaths *, size_t n,
>>>                                  struct ovn_datapath **);
>>>
>>> @@ -188,10 +192,11 @@ struct ovn_lb_group_datapaths {
>>>      const struct ovn_lb_group *lb_group;
>>>
>>>      /* Datapaths to which 'lb_group' is applied. */
>>> -    size_t n_ls;
>>> -    struct ovn_datapath **ls;
>>> -    size_t n_lr;
>>> -    struct ovn_datapath **lr;
>>> +    size_t n_nb_ls;
>>> +    unsigned long *nb_ls_map;
>>> +
>>> +    size_t n_nb_lr;
>>> +    unsigned long *nb_lr_map;
>>>  };
>>>
>>>  struct ovn_lb_group_datapaths *ovn_lb_group_datapaths_create(
>>> @@ -202,21 +207,13 @@ void ovn_lb_group_datapaths_destroy(struct 
>>> ovn_lb_group_datapaths *);
>>>  struct ovn_lb_group_datapaths *ovn_lb_group_datapaths_find(
>>>      const struct hmap *lb_group_dps, const struct uuid *);
>>>
>>> -static inline void
>>> -ovn_lb_group_datapaths_add_ls(struct ovn_lb_group_datapaths *lbg_dps, 
>>> size_t n,
>>> -                               struct ovn_datapath **ods)
>>> -{
>>> -    memcpy(&lbg_dps->ls[lbg_dps->n_ls], ods, n * sizeof *ods);
>>> -    lbg_dps->n_ls += n;
>>> -}
>>> -
>>
>> This was O(1), but this patch changed it to O(n). Maybe we need some scale 
>> test data
>> to see the impact of recompute performance until we are confident that 
>> recompute
>> won't be triggered in most cases. I remember this "memcpy" approach was for 
>> a significant
>> optimization from Ilya. cc @Ilya Maximets <mailto:[email protected]> in 
>> case he has
>> more comments.
> 
> Thanks, Han, for pointing out.  It is hard to tell what the impact would be
> without testing as there are way too many factors, as usual with performance
> testing.  So, I fired up 500 node density-heavy test with ovn-heater.
> 
> Results are not great.  I see average long poll intervals on northd went up
> to 6.6 seconds from previous 2.6 seconds.  95%% went up to 9.8 seconds with
> the patch set applied from 3.9 seconds on current main.
> 
> Here is an example iteration log during the test:
> 
> 2023-07-18T12:34:37.147Z|01264|inc_proc_eng|INFO|node: northd, handler for 
> input NB_logical_switch took 2479ms
> 2023-07-18T12:34:40.633Z|01265|inc_proc_eng|INFO|node: northd, handler for 
> input NB_logical_router took 3486ms
> 2023-07-18T12:34:41.976Z|01266|inc_proc_eng|INFO|node: lflow, recompute 
> (failed handler for input northd) took 962ms
> 2023-07-18T12:34:41.977Z|01267|timeval|WARN|Unreasonably long 7362ms poll 
> interval (6604ms user, 381ms system)
> 
> We can see that northd node is processed incrementally, but very slow.
> In comparison, an average iteration looks like this on the current main:
> 
> 2023-07-15T19:55:02.234Z|00456|inc_proc_eng|INFO|node: northd, recompute 
> (missing handler for input NB_load_balancer) took 1829ms
> 2023-07-15T19:55:03.237Z|00457|inc_proc_eng|INFO|node: lflow, recompute 
> (failed handler for input northd) took 979ms
> 2023-07-15T19:55:03.239Z|00458|timeval|WARN|Unreasonably long 2868ms poll 
> interval (2314ms user, 130ms system)
> 
> We can see that a full recompute of the northd node on current main is much
> faster than either of the I-P handlers.
> 
> perf somewhat points to the functions above to be hot spots, but the
> datapath lookup dominates the chart:
> 
>   29.39%  ovn-northd          [.] ovn_lb_datapaths_find
>    8.38%  ovn-northd          [.] ovn_lb_datapaths_add_lr
>    6.72%  ovn-northd          [.] ovn_lb_datapaths_remove_ls
>    6.40%  ovn-northd          [.] ovn_lb_datapaths_add_ls
>    6.19%  ovn-northd          [.] ovn_lb_datapaths_remove_lr
>    3.86%  ovn-northd          [.] northd_handle_lr_changes
> 
> The ovn-installed latency in the test went up from average 3.3 seconds to
> 6.4 seconds.
> 
> All in all, incremental processing implemented in this patch set appears to
> be 2-3 times slower than a full recompute on current main.

I also checked the recompute time now.  And it looks like a full recompute
after applying the patch set is about 2x slower than on a current main.
Interestingly enough, it means that with a patch set applied, the full
recompute is a bit faster than I-P iterations.

> 
> Best regards, Ilya Maximets
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to