An issue was filed where CT zone 0 was assigned to both a logical router SNAT and to a logical port. CT zone 0 is typically "reserved" and not assigned by ovn-controller; however, since SNAT zones are configurable, it is possible for ovn-controller to assign this zone at the CMS's request. This accounts for how CT zone 0 can be assigned for SNAT. There was also a small bug in the incremental processing that could result in a logical port being assigned zone 0.
In the specific issue report, CT zones were restored from the OVSDB when ovn-controller started, and the conflicting CT zones were already present. ovn-controller dutifully loaded these zones up. But then there was nothing that would allow for the conflict to be resolved afterwards. It is unknown how these conflicts entered into the OVSDB in the first place. This change does not purport to prevent conflicts from entering the OVSDB. However, it does make the following changes that should further safeguard against unwanted behavior: * ct_zones_runtime_data_handler() now assigns zones starting at 1 instead of 0. This makes it use the same range as update_ct_zones(). * update_ct_zones() now keeps a new simap for zones that are assigned but not due to an SNAT zone request. This allows for us to guarantee that when there is a conflict between a previously auto-assigned CT zone and a newly-requested zone, we are guaranteed to remove the auto-assigned CT zone. * The removal of conflicting auto-assigned CT zones is now performed before dealing with the newly requested zone. This makes it so that if we load conflicting zones from the OVSDB or if there is some issue that results in conflicting zones being assigned, we will correct the issue. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2126406 Signed-off-by: Mark Michelson <[email protected]> --- controller/ovn-controller.c | 55 +++++++++-------- tests/ovn-controller.at | 116 ++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 28 deletions(-) diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 8895c7a2b..f8b1056e1 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -659,7 +659,8 @@ update_ct_zones(const struct shash *binding_lports, const char *user; struct sset all_users = SSET_INITIALIZER(&all_users); struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones); - unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)]; + unsigned long unreq_snat_zones_map[BITMAP_N_LONGS(MAX_CT_ZONES)]; + struct simap unreq_snat_zones = SIMAP_INITIALIZER(&unreq_snat_zones); struct shash_node *shash_node; SHASH_FOR_EACH (shash_node, binding_lports) { @@ -696,49 +697,46 @@ update_ct_zones(const struct shash *binding_lports, bitmap_set0(ct_zone_bitmap, ct_zone->data); simap_delete(ct_zones, ct_zone); } else if (!simap_find(&req_snat_zones, ct_zone->name)) { - bitmap_set1(unreq_snat_zones, ct_zone->data); + bitmap_set1(unreq_snat_zones_map, ct_zone->data); + simap_put(&unreq_snat_zones, ct_zone->name, ct_zone->data); } } /* Prioritize requested CT zones */ struct simap_node *snat_req_node; SIMAP_FOR_EACH (snat_req_node, &req_snat_zones) { - struct simap_node *node = simap_find(ct_zones, snat_req_node->name); - if (node) { - if (node->data == snat_req_node->data) { - /* No change to this request, so no action needed */ - continue; - } else { - /* Zone request has changed for this node. delete old entry */ - bitmap_set0(ct_zone_bitmap, node->data); - simap_delete(ct_zones, node); - } - } - /* Determine if someone already had this zone auto-assigned. * If so, then they need to give up their assignment since * that zone is being explicitly requested now. */ - if (bitmap_is_set(unreq_snat_zones, snat_req_node->data)) { - struct simap_node *dup; - SIMAP_FOR_EACH_SAFE (dup, ct_zones) { - if (dup != snat_req_node && dup->data == snat_req_node->data) { - simap_delete(ct_zones, dup); - break; + if (bitmap_is_set(unreq_snat_zones_map, snat_req_node->data)) { + struct simap_node *unreq_node; + SIMAP_FOR_EACH_SAFE (unreq_node, &unreq_snat_zones) { + if (unreq_node->data == snat_req_node->data) { + simap_find_and_delete(ct_zones, unreq_node->name); + simap_delete(&unreq_snat_zones, unreq_node); } } + /* Set this bit to 0 so that if multiple datapaths have requested * this zone, we don't needlessly double-detect this condition. */ - bitmap_set0(unreq_snat_zones, snat_req_node->data); + bitmap_set0(unreq_snat_zones_map, snat_req_node->data); } - add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED, - snat_req_node->data, true, - snat_req_node->name); - - bitmap_set1(ct_zone_bitmap, snat_req_node->data); - simap_put(ct_zones, snat_req_node->name, snat_req_node->data); + struct simap_node *node = simap_find(ct_zones, snat_req_node->name); + if (node) { + if (node->data != snat_req_node->data) { + /* Zone request has changed for this node. delete old entry and + * create new one*/ + add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED, + snat_req_node->data, true, + snat_req_node->name); + bitmap_set0(ct_zone_bitmap, node->data); + } + bitmap_set1(ct_zone_bitmap, snat_req_node->data); + node->data = snat_req_node->data; + } } /* xxx This is wasteful to assign a zone to each port--even if no @@ -756,6 +754,7 @@ update_ct_zones(const struct shash *binding_lports, } simap_destroy(&req_snat_zones); + simap_destroy(&unreq_snat_zones); sset_destroy(&all_users); } @@ -2178,7 +2177,7 @@ ct_zones_runtime_data_handler(struct engine_node *node, void *data) struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings; struct tracked_datapath *tdp; - int scan_start = 0; + int scan_start = 1; bool updated = false; diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index 3c3fb31c7..25d420936 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -2337,3 +2337,119 @@ done AT_CHECK([grep "deleted interface patch" hv1/ovs-vswitchd.log], [1], [ignore]) OVN_CLEANUP([hv1]) AT_CLEANUP + +AT_SETUP([ovn-controller - resolve CT zone conflicts from ovsdb]) + +ovn_start + +net_add n1 +sim_add hv1 +as hv1 +check ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 + +get_zone_num () { + output=$1 + name=$2 + printf "$output" | grep $name | cut -d ' ' -f 2 +} + +check_ovsdb_zone() { + name=$1 + ct_zone=$2 + db_zone=$(ovs-vsctl get Bridge br-int external_ids:ct-zone-${name} | sed -e 's/^"//' -e 's/"$//') + test $ct_zone -eq $db_zone +} + +check ovs-vsctl add-port br-int ls0-hv1 -- set Interface ls0-hv1 external-ids:iface-id=ls0-hv1 +check ovs-vsctl add-port br-int ls0-hv2 -- set Interface ls0-hv2 external-ids:iface-id=ls0-hv2 + +check ovn-nbctl lr-add lr0 + +check ovn-nbctl ls-add ls0 +check ovn-nbctl lsp-add ls0 ls0-lr0 +check ovn-nbctl lsp-set-type ls0-lr0 router +check ovn-nbctl lsp-set-addresses ls0-lr0 router +check ovn-nbctl lrp-add lr0 lr0-ls0 00:00:00:00:00:01 10.0.0.1 + +check ovn-nbctl lsp-add ls0 ls0-hv1 +check ovn-nbctl lsp-set-addresses ls0-hv1 "00:00:00:00:00:02 10.0.0.2" + +check ovn-nbctl lsp-add ls0 ls0-hv2 +check ovn-nbctl lsp-set-addresses ls0-hv2 "00:00:00:00:00:03 10.0.0.3" + +check ovn-nbctl lrp-add lr0 lrp-gw 01:00:00:00:00:01 172.16.0.1 +check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1 + +check ovn-nbctl --wait=hv sync + +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list) +echo "$ct_zones" + +port1_zone=$(get_zone_num "$ct_zones" ls0-hv1) +port2_zone=$(get_zone_num "$ct_zones" ls0-hv2) + +lr_uuid=$(fetch_column Datapath_Binding _uuid external_ids:name=lr0) +snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat) +echo "snat_zone is $snat_zone" + +check test "$port1_zone" -ne "$port2_zone" +check test "$port2_zone" -ne "$snat_zone" +check test "$port1_zone" -ne "$snat_zone" + +OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone]) +OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone]) +OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone]) + +# Now purposely request an SNAT zone for lr0 that conflicts with a zone +# currently assigned to a logical port + +snat_req_zone=$port1_zone +check ovn-nbctl set Logical_Router lr0 options:snat-ct-zone=$snat_req_zone +ovn-nbctl --wait=hv sync + +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list) +echo "$ct_zones" + +port1_zone=$(get_zone_num "$ct_zones" ls0-hv1) +port2_zone=$(get_zone_num "$ct_zones" ls0-hv2) +snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat) + +check test "$snat_zone" -eq "$snat_req_zone" +check test "$port1_zone" -ne "$port2_zone" +check test "$port2_zone" -ne "$snat_zone" +check test "$port1_zone" -ne "$snat_zone" + +OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone]) +OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone]) +OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone]) + +# Now create a conflict in the OVSDB and restart ovn-controller. + +ovs-appctl -t ovn-controller exit --restart + +ovs-vsctl set bridge br-int external_ids:ct-zone-ls0-hv1="$snat_req_zone" +ovs-vsctl set bridge br-int external_ids:ct-zone-ls0-hv2="$snat_req_zone" + +start_daemon ovn-controller + +ovn-nbctl --wait=hv sync + +ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list) +echo "$ct_zones" + +port1_zone=$(get_zone_num "$ct_zones" ls0-hv1) +port2_zone=$(get_zone_num "$ct_zones" ls0-hv2) +snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat) + +check test "$snat_zone" -eq "$snat_req_zone" +check test "$port1_zone" -ne "$port2_zone" +check test "$port2_zone" -ne "$snat_zone" +check test "$port1_zone" -ne "$snat_zone" + +OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone]) +OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone]) +OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone]) + +OVN_CLEANUP([hv1]) +AT_CLEANUP -- 2.37.3 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
