From: Ankur Sharma <ankur.sha...@nutanix.com> BACKGROUND: a. ovn-controller assigns CT ZONES for local ports and datapaths. b. If a local port/datapath is cleaned up from a chassis, then corresponding CT ZONE is "unassigned"/"freed" up.
ISSUE: Above logic and implementations leaves stale CT entries in the datapath, which may get reused unexpectedly, thereby causing issues like, packets going through ct_nat(SNAT_IP_NEW) and getting a stale IP as SNAT IP etc. a. As a part of CT Zone unassign, implementation should FLUSH the corresponding CT entries, i.e it should do FLUSH by ZONE. As os now, implementation avoids the flushing, thereby leaving stale CT entries. b. Similarly, since the implementation relies on datapath existence for assign/unassign of CT ZONEs. Hence, simple operations like moving the logical router from one external logical switch to another, may not cause any CT ZONE reassignment and thereby stale CT entries might get consumed, when they should not have been. c. a. and b. combined causes following: i. Start a to be SNATed traffic from internal endpoint to an external endpoint. Let us say internal endpoint IP is 50.0.0.10 and external endpoint ip is 8.8.8.8 and logical router port ip (and hence SNAT ip) is 100.0.0.10. ii. Detach the logical router from old external logical switch and attach to new external logical switch. As a result of this operation, new router port ip becomes 200.0.0.10 , which also becomes the new SNAT ip. iii. The observation has been that traffic initiated in i. above still ends up using OLD SNAT IP, i.e 100.0.0.10, rather than 200.0.0.10 iv. iii. above happened, because although from OVS DP, the IP for NAT action is 200.0.0.10, however, since its an ongoing traffic, hence the CT entries come in use and end up NATing to old SNAT ip 100.0.0.10. For example: OVS DP STATE recirc_id(0),in_port(16),....ct(commit,zone=1,nat(src=200.0.0.10)) CT STATE icmp,orig=(src=50.0.0.10,dst=8.8.8.8,id=2288,type=8,code=0), reply=(src=8.8.8.8,dst=100.0.0.10,id=2288,type=0,code=0),zone=1 FIX: This patch improves the overall CT ZONE management by doing following: a. Do a FLUSH by CT ZONE, once we identify that a zone has to be freed up. b. From datapath perspective, restrict the CT ZONE assignment ONLY to logical routers that has NAT rules enabled. c. Instead of using logical router uuid as ct zone key, use crossproduct of logical router and logical router port that connects to external logical switch. Signed-off-by: Ankur Sharma <ankur.sha...@nutanix.com> --- controller/ovn-controller.c | 37 +++++++++++++++++++++++++++---------- controller/physical.c | 18 ++++++++++++------ lib/ovn-util.c | 10 ++++++---- lib/ovn-util.h | 3 ++- 4 files changed, 47 insertions(+), 21 deletions(-) diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 5ca32ac..9a6746e 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -521,17 +521,34 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths, sset_add(&all_users, user); } - /* Local patched datapath (gateway routers) need zones assigned. */ + /* Local patched datapath (gateway routers) need zones assigned. + * Only local logical routers with atleast one NAT rule are considered for + * CT zone assignment.*/ const struct local_datapath *ld; HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { - /* XXX Add method to limit zone assignment to logical router - * datapaths with NAT */ - char *dnat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "dnat"); - char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "snat"); - sset_add(&all_users, dnat); - sset_add(&all_users, snat); - free(dnat); - free(snat); + const char *dp_nblr = smap_get(&ld->datapath->external_ids, + "logical-router"); + if (dp_nblr) { + for (size_t iter = 0; iter < ld->n_peer_ports; iter++) { + const struct sbrec_port_binding *peer_binding = + ld->peer_ports[iter].remote; + const struct sbrec_port_binding *local_binding = + ld->peer_ports[iter].local; + + if (peer_binding->nat_addresses) { + char *dnat = alloc_nat_zone_key(&ld->datapath->header_.uuid, + &local_binding->header_.uuid, + "dnat"); + char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid, + &local_binding->header_.uuid, + "snat"); + sset_add(&all_users, dnat); + sset_add(&all_users, snat); + free(dnat); + free(snat); + } + } + } } /* Delete zones that do not exist in above sset. */ @@ -541,7 +558,7 @@ update_ct_zones(const struct sset *lports, const struct hmap *local_datapaths, ct_zone->data, ct_zone->name); struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending); - pending->state = CT_ZONE_DB_QUEUED; /* Skip flushing zone. */ + pending->state = CT_ZONE_OF_QUEUED; pending->zone = ct_zone->data; pending->add = false; shash_add(pending_ct_zones, ct_zone->name, pending); diff --git a/controller/physical.c b/controller/physical.c index 535c777..cc497e0 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -218,18 +218,24 @@ static struct zone_ids get_zone_ids(const struct sbrec_port_binding *binding, const struct simap *ct_zones) { - struct zone_ids zone_ids; + struct zone_ids zone_ids = {0}; zone_ids.ct = simap_get(ct_zones, binding->logical_port); - const struct uuid *key = &binding->datapath->header_.uuid; + const struct uuid *key1 = &binding->datapath->header_.uuid; + const struct uuid *key2 = &binding->header_.uuid; - char *dnat = alloc_nat_zone_key(key, "dnat"); - zone_ids.dnat = simap_get(ct_zones, dnat); + char *dnat = alloc_nat_zone_key(key1, key2, "dnat"); + + if (simap_contains(ct_zones, dnat)) { + zone_ids.dnat = simap_get(ct_zones, dnat); + } free(dnat); - char *snat = alloc_nat_zone_key(key, "snat"); - zone_ids.snat = simap_get(ct_zones, snat); + char *snat = alloc_nat_zone_key(key1, key2, "snat"); + if (simap_contains(ct_zones, snat)) { + zone_ids.snat = simap_get(ct_zones, snat); + } free(snat); return zone_ids; diff --git a/lib/ovn-util.c b/lib/ovn-util.c index cdb5e18..cba7355 100644 --- a/lib/ovn-util.c +++ b/lib/ovn-util.c @@ -327,14 +327,16 @@ destroy_lport_addresses(struct lport_addresses *laddrs) free(laddrs->ipv6_addrs); } -/* Allocates a key for NAT conntrack zone allocation for a provided - * 'key' record and a 'type'. +/* Allocates a key for NAT conntrack zone allocation for provided + * 'keys' and a 'type'. * * It is the caller's responsibility to free the allocated memory. */ char * -alloc_nat_zone_key(const struct uuid *key, const char *type) +alloc_nat_zone_key(const struct uuid *key1, const struct uuid *key2, + const char *type) { - return xasprintf(UUID_FMT"_%s", UUID_ARGS(key), type); + return xasprintf(UUID_FMT"_"UUID_FMT"_%s", UUID_ARGS(key1), + UUID_ARGS(key2), type); } const char * diff --git a/lib/ovn-util.h b/lib/ovn-util.h index 0f7b501..fe86bf8 100644 --- a/lib/ovn-util.h +++ b/lib/ovn-util.h @@ -77,7 +77,8 @@ bool extract_sbrec_binding_first_mac(const struct sbrec_port_binding *binding, void destroy_lport_addresses(struct lport_addresses *); -char *alloc_nat_zone_key(const struct uuid *key, const char *type); +char *alloc_nat_zone_key(const struct uuid *key1, const struct uuid *key2, + const char *type); const char *default_nb_db(void); const char *default_sb_db(void); -- 1.8.3.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev