On Tue, Jul 18, 2023 at 9:39 PM Ilya Maximets <[email protected]> wrote: > > On 7/18/23 15:17, Ilya Maximets wrote: > > On 7/18/23 05:32, Han Zhou wrote: > >> > >> > >> On Fri, Jul 7, 2023 at 1:55 PM <[email protected] <mailto:[email protected]>> > >> wrote: > >>> > >>> From: Numan Siddique <[email protected] <mailto:[email protected]>> > >>> > >>> For every a given load balancer group 'A', northd engine data maintains > >>> a bitmap of datapaths associated to this lb group. So when lb group 'A' > >>> gets associated to a logical switch 's1', the bitmap index of 's1' is set > >>> in its bitmap. > >>> > >>> In order to handle the load balancer group changes incrementally for a > >>> logical switch, we need to set and clear the bitmap bits accordingly. > >>> And this patch does it. > >>> > >>> Signed-off-by: Numan Siddique <[email protected] <mailto:[email protected]>> > >>> --- > >>> lib/lb.c | 45 +++++++++++++++--- > >>> lib/lb.h | 33 ++++++-------- > >>> northd/northd.c | 109 ++++++++++++++++++++++++++++++++++++++------ > >>> tests/ovn-northd.at <http://ovn-northd.at> | 6 +-- > >>> 4 files changed, 150 insertions(+), 43 deletions(-) > >>> > >>> diff --git a/lib/lb.c b/lib/lb.c > >>> index b2b6473c1d..a0426cc42e 100644 > >>> --- a/lib/lb.c > >>> +++ b/lib/lb.c > >>> @@ -1100,7 +1100,7 @@ ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths > >>> *lb_dps, size_t n, > >>> > >>> void > >>> ovn_lb_datapaths_remove_ls(struct ovn_lb_datapaths *lb_dps, size_t n, > >>> - struct ovn_datapath **ods) > >>> + struct ovn_datapath **ods) > >>> { > >>> for (size_t i = 0; i < n; i++) { > >>> if (bitmap_is_set(lb_dps->nb_ls_map, ods[i]->index)) { > >>> @@ -1112,14 +1112,14 @@ ovn_lb_datapaths_remove_ls(struct > >>> ovn_lb_datapaths *lb_dps, size_t n, > >>> > >>> struct ovn_lb_group_datapaths * > >>> ovn_lb_group_datapaths_create(const struct ovn_lb_group *lb_group, > >>> - size_t max_ls_datapaths, > >>> - size_t max_lr_datapaths) > >>> + size_t n_ls_datapaths, > >>> + size_t n_lr_datapaths) > >>> { > >>> struct ovn_lb_group_datapaths *lb_group_dps = > >>> xzalloc(sizeof *lb_group_dps); > >>> lb_group_dps->lb_group = lb_group; > >>> - lb_group_dps->ls = xmalloc(max_ls_datapaths * sizeof > >>> *lb_group_dps->ls); > >>> - lb_group_dps->lr = xmalloc(max_lr_datapaths * sizeof > >>> *lb_group_dps->lr); > >>> + lb_group_dps->nb_ls_map = bitmap_allocate(n_ls_datapaths); > >>> + lb_group_dps->nb_lr_map = bitmap_allocate(n_lr_datapaths); > >>> > >>> return lb_group_dps; > >>> } > >>> @@ -1127,8 +1127,8 @@ ovn_lb_group_datapaths_create(const struct > >>> ovn_lb_group *lb_group, > >>> void > >>> ovn_lb_group_datapaths_destroy(struct ovn_lb_group_datapaths > >>> *lb_group_dps) > >>> { > >>> - free(lb_group_dps->ls); > >>> - free(lb_group_dps->lr); > >>> + bitmap_free(lb_group_dps->nb_lr_map); > >>> + bitmap_free(lb_group_dps->nb_ls_map); > >>> free(lb_group_dps); > >>> } > >>> > >>> @@ -1146,3 +1146,34 @@ ovn_lb_group_datapaths_find(const struct hmap > >>> *lb_group_dps_map, > >>> } > >>> return NULL; > >>> } > >>> + > >>> +void > >>> +ovn_lb_group_datapaths_add_ls(struct ovn_lb_group_datapaths *lbg_dps, > >>> size_t n, > >>> + struct ovn_datapath **ods) > >>> +{ > >>> + for (size_t i = 0; i < n; i++) { > >>> + if (!bitmap_is_set(lbg_dps->nb_ls_map, ods[i]->index)) { > >>> + bitmap_set1(lbg_dps->nb_ls_map, ods[i]->index); > >>> + lbg_dps->n_nb_ls++; > >>> + } > >>> + } > >>> +} > >>> + > >>> +void > >>> +ovn_lb_group_datapaths_remove_ls(struct ovn_lb_group_datapaths *lbg_dps, > >>> + size_t n, struct ovn_datapath **ods) > >>> +{ > >>> + for (size_t i = 0; i < n; i++) { > >>> + if (bitmap_is_set(lbg_dps->nb_ls_map, ods[i]->index)) { > >>> + bitmap_set0(lbg_dps->nb_ls_map, ods[i]->index); > >>> + lbg_dps->n_nb_ls--; > >>> + } > >>> + } > >>> +} > >>> + > >>> +void > >>> +ovn_lb_group_datapaths_add_lr(struct ovn_lb_group_datapaths *lbg_dps, > >>> + struct ovn_datapath *lr) > >>> +{ > >>> + bitmap_set1(lbg_dps->nb_lr_map, lr->index); > >>> +} > >>> diff --git a/lib/lb.h b/lib/lb.h > >>> index ac33333a7f..08723e8ef3 100644 > >>> --- a/lib/lb.h > >>> +++ b/lib/lb.h > >>> @@ -19,6 +19,7 @@ > >>> > >>> #include <sys/types.h> > >>> #include <netinet/in.h> > >>> +#include "lib/bitmap.h" > >>> #include "lib/smap.h" > >>> #include "openvswitch/hmap.h" > >>> #include "ovn-util.h" > >>> @@ -179,6 +180,9 @@ void ovn_lb_datapaths_add_lr(struct ovn_lb_datapaths > >>> *, size_t n, > >>> struct ovn_datapath **); > >>> void ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths *, size_t n, > >>> struct ovn_datapath **); > >>> +void ovn_lb_datapaths_set_ls(struct ovn_lb_datapaths *, size_t n, > >>> + struct ovn_datapath **); > >>> + > >>> void ovn_lb_datapaths_remove_ls(struct ovn_lb_datapaths *, size_t n, > >>> struct ovn_datapath **); > >>> > >>> @@ -188,10 +192,11 @@ struct ovn_lb_group_datapaths { > >>> const struct ovn_lb_group *lb_group; > >>> > >>> /* Datapaths to which 'lb_group' is applied. */ > >>> - size_t n_ls; > >>> - struct ovn_datapath **ls; > >>> - size_t n_lr; > >>> - struct ovn_datapath **lr; > >>> + size_t n_nb_ls; > >>> + unsigned long *nb_ls_map; > >>> + > >>> + size_t n_nb_lr; > >>> + unsigned long *nb_lr_map; > >>> }; > >>> > >>> struct ovn_lb_group_datapaths *ovn_lb_group_datapaths_create( > >>> @@ -202,21 +207,13 @@ void ovn_lb_group_datapaths_destroy(struct > >>> ovn_lb_group_datapaths *); > >>> struct ovn_lb_group_datapaths *ovn_lb_group_datapaths_find( > >>> const struct hmap *lb_group_dps, const struct uuid *); > >>> > >>> -static inline void > >>> -ovn_lb_group_datapaths_add_ls(struct ovn_lb_group_datapaths *lbg_dps, > >>> size_t n, > >>> - struct ovn_datapath **ods) > >>> -{ > >>> - memcpy(&lbg_dps->ls[lbg_dps->n_ls], ods, n * sizeof *ods); > >>> - lbg_dps->n_ls += n; > >>> -} > >>> - > >> > >> This was O(1), but this patch changed it to O(n). Maybe we need some scale > >> test data > >> to see the impact of recompute performance until we are confident that > >> recompute > >> won't be triggered in most cases. I remember this "memcpy" approach was > >> for a significant > >> optimization from Ilya. cc @Ilya Maximets <mailto:[email protected]> in > >> case he has > >> more comments. > > > > Thanks, Han, for pointing out. It is hard to tell what the impact would be > > without testing as there are way too many factors, as usual with performance > > testing. So, I fired up 500 node density-heavy test with ovn-heater. > > > > Results are not great. I see average long poll intervals on northd went up > > to 6.6 seconds from previous 2.6 seconds. 95%% went up to 9.8 seconds with > > the patch set applied from 3.9 seconds on current main. > > > > Here is an example iteration log during the test: > > > > 2023-07-18T12:34:37.147Z|01264|inc_proc_eng|INFO|node: northd, handler for > > input NB_logical_switch took 2479ms > > 2023-07-18T12:34:40.633Z|01265|inc_proc_eng|INFO|node: northd, handler for > > input NB_logical_router took 3486ms > > 2023-07-18T12:34:41.976Z|01266|inc_proc_eng|INFO|node: lflow, recompute > > (failed handler for input northd) took 962ms > > 2023-07-18T12:34:41.977Z|01267|timeval|WARN|Unreasonably long 7362ms poll > > interval (6604ms user, 381ms system) > > > > We can see that northd node is processed incrementally, but very slow. > > In comparison, an average iteration looks like this on the current main: > > > > 2023-07-15T19:55:02.234Z|00456|inc_proc_eng|INFO|node: northd, recompute > > (missing handler for input NB_load_balancer) took 1829ms > > 2023-07-15T19:55:03.237Z|00457|inc_proc_eng|INFO|node: lflow, recompute > > (failed handler for input northd) took 979ms > > 2023-07-15T19:55:03.239Z|00458|timeval|WARN|Unreasonably long 2868ms poll > > interval (2314ms user, 130ms system) > > > > We can see that a full recompute of the northd node on current main is much > > faster than either of the I-P handlers. > > > > perf somewhat points to the functions above to be hot spots, but the > > datapath lookup dominates the chart: > > > > 29.39% ovn-northd [.] ovn_lb_datapaths_find > > 8.38% ovn-northd [.] ovn_lb_datapaths_add_lr > > 6.72% ovn-northd [.] ovn_lb_datapaths_remove_ls > > 6.40% ovn-northd [.] ovn_lb_datapaths_add_ls > > 6.19% ovn-northd [.] ovn_lb_datapaths_remove_lr > > 3.86% ovn-northd [.] northd_handle_lr_changes > > > > The ovn-installed latency in the test went up from average 3.3 seconds to > > 6.4 seconds. > > > > All in all, incremental processing implemented in this patch set appears to > > be 2-3 times slower than a full recompute on current main. > > I also checked the recompute time now. And it looks like a full recompute > after applying the patch set is about 2x slower than on a current main. > Interestingly enough, it means that with a patch set applied, the full > recompute is a bit faster than I-P iterations.
Oops. Thanks Han and Ilya for the comments and Ilya for running a scale test. I'll address all these and in the next version will also include the scale test results. Numan > > > > > Best regards, Ilya Maximets > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
