An issue was filed where CT zone 0 was assigned to both a logical router
SNAT and to a logical port. CT zone 0 is typically "reserved" and not
assigned by ovn-controller; however, since SNAT zones are configurable,
it is possible for ovn-controller to assign this zone at the CMS's
request. This accounts for how CT zone 0 can be assigned for SNAT. There
was also a small bug in the incremental processing that could result in
a logical port being assigned zone 0.
In the specific issue report, CT zones were restored from the OVSDB when
ovn-controller started, and the conflicting CT zones were already
present. ovn-controller dutifully loaded these zones up. But then there
was nothing that would allow for the conflict to be resolved afterwards.
It is unknown how these conflicts entered into the OVSDB in the first
place. This change does not purport to prevent conflicts from entering
the OVSDB. However, it does make the following changes that should
further safeguard against unwanted behavior:
* ct_zones_runtime_data_handler() now assigns zones starting at
1 instead of 0. This makes it use the same range as update_ct_zones().
* update_ct_zones() now keeps a new simap for zones that are assigned
but not due to an SNAT zone request. This allows for us to guarantee
that when there is a conflict between a previously auto-assigned CT
zone and a newly-requested zone, we are guaranteed to remove the
auto-assigned CT zone.
* The removal of conflicting auto-assigned CT zones is now performed
before dealing with the newly requested zone. This makes it so that if
we load conflicting zones from the OVSDB or if there is some issue
that results in conflicting zones being assigned, we will correct the
issue.
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2126406
Signed-off-by: Mark Michelson <[email protected]>
---
controller/ovn-controller.c | 55 +++++++++--------
tests/ovn-controller.at | 116 ++++++++++++++++++++++++++++++++++++
2 files changed, 143 insertions(+), 28 deletions(-)
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 8895c7a2b..f8b1056e1 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -659,7 +659,8 @@ update_ct_zones(const struct shash *binding_lports,
const char *user;
struct sset all_users = SSET_INITIALIZER(&all_users);
struct simap req_snat_zones = SIMAP_INITIALIZER(&req_snat_zones);
- unsigned long unreq_snat_zones[BITMAP_N_LONGS(MAX_CT_ZONES)];
+ unsigned long unreq_snat_zones_map[BITMAP_N_LONGS(MAX_CT_ZONES)];
+ struct simap unreq_snat_zones = SIMAP_INITIALIZER(&unreq_snat_zones);
struct shash_node *shash_node;
SHASH_FOR_EACH (shash_node, binding_lports) {
@@ -696,49 +697,46 @@ update_ct_zones(const struct shash *binding_lports,
bitmap_set0(ct_zone_bitmap, ct_zone->data);
simap_delete(ct_zones, ct_zone);
} else if (!simap_find(&req_snat_zones, ct_zone->name)) {
- bitmap_set1(unreq_snat_zones, ct_zone->data);
+ bitmap_set1(unreq_snat_zones_map, ct_zone->data);
+ simap_put(&unreq_snat_zones, ct_zone->name, ct_zone->data);
}
}
/* Prioritize requested CT zones */
struct simap_node *snat_req_node;
SIMAP_FOR_EACH (snat_req_node, &req_snat_zones) {
- struct simap_node *node = simap_find(ct_zones, snat_req_node->name);
- if (node) {
- if (node->data == snat_req_node->data) {
- /* No change to this request, so no action needed */
- continue;
- } else {
- /* Zone request has changed for this node. delete old entry */
- bitmap_set0(ct_zone_bitmap, node->data);
- simap_delete(ct_zones, node);
- }
- }
-
/* Determine if someone already had this zone auto-assigned.
* If so, then they need to give up their assignment since
* that zone is being explicitly requested now.
*/
- if (bitmap_is_set(unreq_snat_zones, snat_req_node->data)) {
- struct simap_node *dup;
- SIMAP_FOR_EACH_SAFE (dup, ct_zones) {
- if (dup != snat_req_node && dup->data == snat_req_node->data) {
- simap_delete(ct_zones, dup);
- break;
+ if (bitmap_is_set(unreq_snat_zones_map, snat_req_node->data)) {
+ struct simap_node *unreq_node;
+ SIMAP_FOR_EACH_SAFE (unreq_node, &unreq_snat_zones) {
+ if (unreq_node->data == snat_req_node->data) {
+ simap_find_and_delete(ct_zones, unreq_node->name);
+ simap_delete(&unreq_snat_zones, unreq_node);
}
}
+
/* Set this bit to 0 so that if multiple datapaths have requested
* this zone, we don't needlessly double-detect this condition.
*/
- bitmap_set0(unreq_snat_zones, snat_req_node->data);
+ bitmap_set0(unreq_snat_zones_map, snat_req_node->data);
}
- add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
- snat_req_node->data, true,
- snat_req_node->name);
-
- bitmap_set1(ct_zone_bitmap, snat_req_node->data);
- simap_put(ct_zones, snat_req_node->name, snat_req_node->data);
+ struct simap_node *node = simap_find(ct_zones, snat_req_node->name);
+ if (node) {
+ if (node->data != snat_req_node->data) {
+ /* Zone request has changed for this node. delete old entry and
+ * create new one*/
+ add_pending_ct_zone_entry(pending_ct_zones, CT_ZONE_OF_QUEUED,
+ snat_req_node->data, true,
+ snat_req_node->name);
+ bitmap_set0(ct_zone_bitmap, node->data);
+ }
+ bitmap_set1(ct_zone_bitmap, snat_req_node->data);
+ node->data = snat_req_node->data;
+ }
}
/* xxx This is wasteful to assign a zone to each port--even if no
@@ -756,6 +754,7 @@ update_ct_zones(const struct shash *binding_lports,
}
simap_destroy(&req_snat_zones);
+ simap_destroy(&unreq_snat_zones);
sset_destroy(&all_users);
}
@@ -2178,7 +2177,7 @@ ct_zones_runtime_data_handler(struct engine_node *node,
void *data)
struct hmap *tracked_dp_bindings = &rt_data->tracked_dp_bindings;
struct tracked_datapath *tdp;
- int scan_start = 0;
+ int scan_start = 1;
bool updated = false;
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 3c3fb31c7..25d420936 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -2337,3 +2337,119 @@ done
AT_CHECK([grep "deleted interface patch" hv1/ovs-vswitchd.log], [1], [ignore])
OVN_CLEANUP([hv1])
AT_CLEANUP
+
+AT_SETUP([ovn-controller - resolve CT zone conflicts from ovsdb])
+
+ovn_start
+
+net_add n1
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+get_zone_num () {
+ output=$1
+ name=$2
+ printf "$output" | grep $name | cut -d ' ' -f 2
+}
+
+check_ovsdb_zone() {
+ name=$1
+ ct_zone=$2
+ db_zone=$(ovs-vsctl get Bridge br-int external_ids:ct-zone-${name} | sed -e
's/^"//' -e 's/"$//')
+ test $ct_zone -eq $db_zone
+}
+
+check ovs-vsctl add-port br-int ls0-hv1 -- set Interface ls0-hv1
external-ids:iface-id=ls0-hv1
+check ovs-vsctl add-port br-int ls0-hv2 -- set Interface ls0-hv2
external-ids:iface-id=ls0-hv2
+
+check ovn-nbctl lr-add lr0
+
+check ovn-nbctl ls-add ls0
+check ovn-nbctl lsp-add ls0 ls0-lr0
+check ovn-nbctl lsp-set-type ls0-lr0 router
+check ovn-nbctl lsp-set-addresses ls0-lr0 router
+check ovn-nbctl lrp-add lr0 lr0-ls0 00:00:00:00:00:01 10.0.0.1
+
+check ovn-nbctl lsp-add ls0 ls0-hv1
+check ovn-nbctl lsp-set-addresses ls0-hv1 "00:00:00:00:00:02 10.0.0.2"
+
+check ovn-nbctl lsp-add ls0 ls0-hv2
+check ovn-nbctl lsp-set-addresses ls0-hv2 "00:00:00:00:00:03 10.0.0.3"
+
+check ovn-nbctl lrp-add lr0 lrp-gw 01:00:00:00:00:01 172.16.0.1
+check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1
+
+check ovn-nbctl --wait=hv sync
+
+ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
+echo "$ct_zones"
+
+port1_zone=$(get_zone_num "$ct_zones" ls0-hv1)
+port2_zone=$(get_zone_num "$ct_zones" ls0-hv2)
+
+lr_uuid=$(fetch_column Datapath_Binding _uuid external_ids:name=lr0)
+snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat)
+echo "snat_zone is $snat_zone"
+
+check test "$port1_zone" -ne "$port2_zone"
+check test "$port2_zone" -ne "$snat_zone"
+check test "$port1_zone" -ne "$snat_zone"
+
+OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone])
+OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone])
+OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone])
+
+# Now purposely request an SNAT zone for lr0 that conflicts with a zone
+# currently assigned to a logical port
+
+snat_req_zone=$port1_zone
+check ovn-nbctl set Logical_Router lr0 options:snat-ct-zone=$snat_req_zone
+ovn-nbctl --wait=hv sync
+
+ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
+echo "$ct_zones"
+
+port1_zone=$(get_zone_num "$ct_zones" ls0-hv1)
+port2_zone=$(get_zone_num "$ct_zones" ls0-hv2)
+snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat)
+
+check test "$snat_zone" -eq "$snat_req_zone"
+check test "$port1_zone" -ne "$port2_zone"
+check test "$port2_zone" -ne "$snat_zone"
+check test "$port1_zone" -ne "$snat_zone"
+
+OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone])
+OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone])
+OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone])
+
+# Now create a conflict in the OVSDB and restart ovn-controller.
+
+ovs-appctl -t ovn-controller exit --restart