On Wed, Dec 6, 2023 at 9:03 AM Dumitru Ceara <[email protected]> wrote: > > When multiple OVSDB updates have been received since the last northd run > it's possible that the IDL tracks changes for database entities that > were added _and also_ removed from the last time the northd processing > engin run. In some cases, those may appear as being simultaneously > "new" and "deleted". > > Currently, the only tables for which can be a problem are > NB.Load_Balancer and NB.Load_Balancer_Group. > > Skip these "transient records" to avoid adding soon to be deleted rows > to the tracked_lb_data->crupdated_lbs records. These are used to build > 'northd' I-P engine state in northd_handle_lb_data_changes(). > > We also avoid crashing if "unexpected" deletes are reported by the IDL. > This is likely due to a bug in the IDL [0] but it's easy to avoid on the > northd side. > > This commit also adds a test case which _might_ detect the issue when > run under valgrind. The test case can't always detect the problem > because a prerequisite for a Load Balancer to be "transient" is that the > IDL processes the update that removes it from the NB Load_Balancer table > and from the Load_Balancer_Group row that was referring to it in the > following order: Load_Balancer_Group table update first and then > Load_Balancer deletion. The order is controlled by the way > 'struct shash' hashes records (table names in this case) and that's > arch and/or compiler dependent. > > [0] https://issues.redhat.com/browse/FDP-193 > > Fixes: a24eed5cc6b4 ("northd: Add initial I-P for load balancer and load > balancer groups") > Reported-at: https://issues.redhat.com/browse/FDP-181 > Signed-off-by: Dumitru Ceara <[email protected]>
Thanks for fixing this issue. Acked-by: Numan Siddique <[email protected]> Please see below a couple of nits. > --- > northd/en-lb-data.c | 52 +++++++++++++++++++++++++++++---------------- > tests/ovn-macros.at | 10 +++++++++ > tests/ovn-northd.at | 37 ++++++++++++++++++++++++++++++++ > 3 files changed, 81 insertions(+), 18 deletions(-) > > diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c > index 250cec848b..d06f46a54b 100644 > --- a/northd/en-lb-data.c > +++ b/northd/en-lb-data.c > @@ -144,6 +144,12 @@ lb_data_load_balancer_handler(struct engine_node *node, > void *data) > > const struct nbrec_load_balancer *tracked_lb; > NBREC_LOAD_BALANCER_TABLE_FOR_EACH_TRACKED (tracked_lb, nb_lb_table) { > + /* "New" + "Deleted" is a no-op. */ > + if (nbrec_load_balancer_is_new(tracked_lb) > + && nbrec_load_balancer_is_deleted(tracked_lb)) { > + continue; > + } > + > struct ovn_northd_lb *lb; > if (nbrec_load_balancer_is_new(tracked_lb)) { > /* New load balancer. */ > @@ -153,19 +159,22 @@ lb_data_load_balancer_handler(struct engine_node *node, > void *data) > add_crupdated_lb_to_tracked_data(lb, trk_lb_data, > lb->health_checks); > trk_lb_data->has_routable_lb |= lb->routable; > - } else if (nbrec_load_balancer_is_deleted(tracked_lb)) { > - lb = ovn_northd_lb_find(&lb_data->lbs, > - &tracked_lb->header_.uuid); > - ovs_assert(lb); > + continue; > + } > + > + /* Protect against "spurious" deletes reported by the IDL. */ > + lb = ovn_northd_lb_find(&lb_data->lbs, &tracked_lb->header_.uuid); > + if (!lb) { > + continue; > + } > + > + if (nbrec_load_balancer_is_deleted(tracked_lb)) { > hmap_remove(&lb_data->lbs, &lb->hmap_node); > add_deleted_lb_to_tracked_data(lb, trk_lb_data, > lb->health_checks); > trk_lb_data->has_routable_lb |= lb->routable; > } else { > /* Load balancer updated. */ > - lb = ovn_northd_lb_find(&lb_data->lbs, > - &tracked_lb->header_.uuid); > - ovs_assert(lb); > bool health_checks = lb->health_checks; > struct sset old_ips_v4 = SSET_INITIALIZER(&old_ips_v4); > struct sset old_ips_v6 = SSET_INITIALIZER(&old_ips_v6); > @@ -217,6 +226,12 @@ lb_data_load_balancer_group_handler(struct engine_node > *node, void *data) > const struct nbrec_load_balancer_group *tracked_lb_group; > NBREC_LOAD_BALANCER_GROUP_TABLE_FOR_EACH_TRACKED (tracked_lb_group, > nb_lbg_table) { > + /* "New" + "Deleted" is a no-op. */ > + if (nbrec_load_balancer_group_is_new(tracked_lb_group) > + && nbrec_load_balancer_group_is_deleted(tracked_lb_group)) { > + continue; > + } > + > if (nbrec_load_balancer_group_is_new(tracked_lb_group)) { > struct ovn_lb_group *lb_group = > create_lb_group(tracked_lb_group, &lb_data->lbs, > @@ -228,21 +243,22 @@ lb_data_load_balancer_group_handler(struct engine_node > *node, void *data) > } > > trk_lb_data->has_routable_lb |= lb_group->has_routable_lb; > - } else if (nbrec_load_balancer_group_is_deleted(tracked_lb_group)) { > - struct ovn_lb_group *lb_group; > - lb_group = ovn_lb_group_find(&lb_data->lbgrps, > - &tracked_lb_group->header_.uuid); > - ovs_assert(lb_group); > + continue; > + } > + > + /* Protect against "spurious" deletes reported by the IDL. */ > + struct ovn_lb_group *lb_group; > + lb_group = ovn_lb_group_find(&lb_data->lbgrps, > + &tracked_lb_group->header_.uuid); > + if (!lb_group) { > + continue; > + } > + > + if (nbrec_load_balancer_group_is_deleted(tracked_lb_group)) { > hmap_remove(&lb_data->lbgrps, &lb_group->hmap_node); > add_deleted_lbgrp_to_tracked_data(lb_group, trk_lb_data); > trk_lb_data->has_routable_lb |= lb_group->has_routable_lb; > } else { > - > - struct ovn_lb_group *lb_group; > - lb_group = ovn_lb_group_find(&lb_data->lbgrps, > - &tracked_lb_group->header_.uuid); > - ovs_assert(lb_group); > - > /* Determine the lbs which are added or deleted for this > * lb group and add them to tracked data. > * Eg. If an lb group lbg1 before the update had [lb1, lb2, lb3] > diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at > index 1b693a22c3..94bdaff2c4 100644 > --- a/tests/ovn-macros.at > +++ b/tests/ovn-macros.at > @@ -891,6 +891,16 @@ start_scapy_server() { > && on_exit "test -e \"$pidfile\" && ovs-appctl -t $ctlfile exit" > } > > +sleep_northd() { > + echo Northd going to sleep > + AT_CHECK([kill -STOP $(cat northd/ovn-northd.pid)]) > +} > + > +wake_up_northd() { > + echo Northd waking up > + AT_CHECK([kill -CONT $(cat northd/ovn-northd.pid)]) > +} > + > sleep_sb() { > echo SB going to sleep > AT_CHECK([kill -STOP $(cat ovn-sb/ovsdb-server.pid)]) > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index e7910e83c6..b47b6c9286 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -10913,3 +10913,40 @@ CHECK_NO_CHANGE_AFTER_RECOMPUTE > > AT_CLEANUP > ]) > + > +OVN_FOR_EACH_NORTHD_NO_HV([ > +AT_SETUP([Load balancer incremental processing - batched updates]) > +ovn_start > + > +# Check the scenario when a LB is created and quickly deleted (northd > +# processes this in a single iteration). > + > +check ovn-nbctl ls-add sw0 > +lbg_uuid=$(ovn-nbctl --wait=sb create load_balancer_group name=lbg) > +check ovn-nbctl --wait=sb add logical_switch sw0 load_balancer_group > $lbg_uuid > + > +# Pause ovn-northd. > +sleep_northd > + > +check ovn-nbctl lb-add lb-temp 50.0.0.10:80 50.0.0.20:8080 > +lb_temp_uuid=$(fetch_column nb:Load_Balancer _uuid name=lb-temp) > +check ovn-nbctl add load_balancer_group $lbg_uuid load_balancer $lb_temp_uuid > +check ovn-nbctl lb-del lb-temp > + > +# Let ovn-northd process all the updates that happened since it went to > sleep. > +#wake_up_northd Why is this commented ? Looks like this needs to be cleaned up ? > + > +# Add a new load balancer to make sure northd still functions properly. > +check ovn-nbctl lb-add lb 50.0.0.10:80 50.0.0.30:8080 > +lb_uuid=$(fetch_column nb:Load_Balancer _uuid name=lb) > +check ovn-nbctl add load_balancer_group $lbg_uuid load_balancer $lb_uuid > + > +wake_up_northd > +check ovn-nbctl --wait=sb sync I think it would be good to call 'CHECK_NO_CHANGE_AFTER_RECOMPUTE' before cleanup just to be sure that I-P is fine. Thanks Numan > + > +AT_CHECK([as northd ovn-appctl -t NORTHD_TYPE status], [0], [dnl > +Status: active > +]) > + > +AT_CLEANUP > +]) > -- > 2.39.3 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
