From: Ankur Sharma <ankur.sha...@nutanix.com>

BACKGROUND:
a. ovn-controller assigns CT ZONES for local ports and datapaths.
b. If a local port/datapath is cleaned up from a chassis, then
   corresponding CT ZONE is "unassigned"/"freed" up.

ISSUE:
Above logic and implementations leaves stale CT entries in the
datapath, which may get reused unexpectedly, thereby causing
issues like, packets going through ct_nat(SNAT_IP_NEW) and getting
a stale IP as SNAT IP etc.

a. As a part of CT Zone unassign, implementation should FLUSH the
   corresponding CT entries, i.e it should do FLUSH by ZONE.
   As os now, implementation avoids the flushing, thereby leaving
   stale CT entries.

b. Similarly, since the implementation relies on datapath existence
   for assign/unassign of CT ZONEs. Hence, simple operations like
   moving the logical router from one external logical switch to
   another, may not cause any CT ZONE reassignment and thereby
   stale CT entries might get consumed, when they should not have
   been.

c. a. and b. combined causes following:
   i. Start a to be SNATed traffic from internal endpoint to an
      external endpoint. Let us say internal endpoint IP is
      50.0.0.10 and external endpoint ip is 8.8.8.8 and
      logical router port ip (and hence SNAT ip) is 100.0.0.10.

  ii. Detach the logical router from old external logical switch
      and attach to new external logical switch. As a result
      of this operation, new router port ip becomes 200.0.0.10
      , which also becomes the new SNAT ip.

 iii. The observation has been that traffic initiated in i. above
      still ends up using OLD SNAT IP, i.e 100.0.0.10, rather than
      200.0.0.10

 iv. iii. above happened, because although from OVS DP, the IP for
     NAT action is 200.0.0.10, however, since its an ongoing traffic,
     hence the CT entries come in use and end up NATing to old SNAT
     ip 100.0.0.10. For example:

     OVS DP STATE
     recirc_id(0),in_port(16),....ct(commit,zone=1,nat(src=200.0.0.10))

     CT STATE
     icmp,orig=(src=50.0.0.10,dst=8.8.8.8,id=2288,type=8,code=0),
     reply=(src=8.8.8.8,dst=100.0.0.10,id=2288,type=0,code=0),zone=1

FIX:
This patch improves the overall CT ZONE management by doing following:
a. Do a FLUSH by CT ZONE, once we identify that a zone has to be freed up.
b. From datapath perspective, restrict the CT ZONE assignment ONLY
   to logical routers that has NAT rules enabled.
c. Instead of using logical router uuid as ct zone key, use crossproduct
   of logical router and logical router port that connects to external
   logical switch.

Signed-off-by: Ankur Sharma <ankur.sha...@nutanix.com>
---
 controller/ovn-controller.c | 37 +++++++++++++++++++++++++++----------
 controller/physical.c       | 18 ++++++++++++------
 lib/ovn-util.c              | 10 ++++++----
 lib/ovn-util.h              |  3 ++-
 4 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 5ca32ac..9a6746e 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -521,17 +521,34 @@ update_ct_zones(const struct sset *lports, const struct 
hmap *local_datapaths,
         sset_add(&all_users, user);
     }
 
-    /* Local patched datapath (gateway routers) need zones assigned. */
+    /* Local patched datapath (gateway routers) need zones assigned.
+     * Only local logical routers with atleast one NAT rule are considered for
+     * CT zone assignment.*/
     const struct local_datapath *ld;
     HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
-        /* XXX Add method to limit zone assignment to logical router
-         * datapaths with NAT */
-        char *dnat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "dnat");
-        char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "snat");
-        sset_add(&all_users, dnat);
-        sset_add(&all_users, snat);
-        free(dnat);
-        free(snat);
+       const char *dp_nblr = smap_get(&ld->datapath->external_ids,
+                                      "logical-router");
+       if (dp_nblr) {
+          for (size_t iter = 0; iter < ld->n_peer_ports; iter++) {
+             const struct sbrec_port_binding *peer_binding =
+                                             ld->peer_ports[iter].remote;
+             const struct sbrec_port_binding *local_binding =
+                                             ld->peer_ports[iter].local;
+
+             if (peer_binding->nat_addresses) {
+                char *dnat = alloc_nat_zone_key(&ld->datapath->header_.uuid,
+                                                &local_binding->header_.uuid,
+                                                "dnat");
+                char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid,
+                                                &local_binding->header_.uuid,
+                                                "snat");
+                sset_add(&all_users, dnat);
+                sset_add(&all_users, snat);
+                free(dnat);
+                free(snat);
+             }
+          }
+       }
     }
 
     /* Delete zones that do not exist in above sset. */
@@ -541,7 +558,7 @@ update_ct_zones(const struct sset *lports, const struct 
hmap *local_datapaths,
                      ct_zone->data, ct_zone->name);
 
             struct ct_zone_pending_entry *pending = xmalloc(sizeof *pending);
-            pending->state = CT_ZONE_DB_QUEUED; /* Skip flushing zone. */
+            pending->state = CT_ZONE_OF_QUEUED;
             pending->zone = ct_zone->data;
             pending->add = false;
             shash_add(pending_ct_zones, ct_zone->name, pending);
diff --git a/controller/physical.c b/controller/physical.c
index 535c777..cc497e0 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -218,18 +218,24 @@ static struct zone_ids
 get_zone_ids(const struct sbrec_port_binding *binding,
              const struct simap *ct_zones)
 {
-    struct zone_ids zone_ids;
+    struct zone_ids zone_ids = {0};
 
     zone_ids.ct = simap_get(ct_zones, binding->logical_port);
 
-    const struct uuid *key = &binding->datapath->header_.uuid;
+    const struct uuid *key1 = &binding->datapath->header_.uuid;
+    const struct uuid *key2 = &binding->header_.uuid;
 
-    char *dnat = alloc_nat_zone_key(key, "dnat");
-    zone_ids.dnat = simap_get(ct_zones, dnat);
+    char *dnat = alloc_nat_zone_key(key1, key2, "dnat");
+
+    if (simap_contains(ct_zones, dnat)) {
+       zone_ids.dnat = simap_get(ct_zones, dnat);
+    }
     free(dnat);
 
-    char *snat = alloc_nat_zone_key(key, "snat");
-    zone_ids.snat = simap_get(ct_zones, snat);
+    char *snat = alloc_nat_zone_key(key1, key2, "snat");
+    if (simap_contains(ct_zones, snat)) {
+       zone_ids.snat = simap_get(ct_zones, snat);
+    }
     free(snat);
 
     return zone_ids;
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index cdb5e18..cba7355 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -327,14 +327,16 @@ destroy_lport_addresses(struct lport_addresses *laddrs)
     free(laddrs->ipv6_addrs);
 }
 
-/* Allocates a key for NAT conntrack zone allocation for a provided
- * 'key' record and a 'type'.
+/* Allocates a key for NAT conntrack zone allocation for provided
+ * 'keys' and a 'type'.
  *
  * It is the caller's responsibility to free the allocated memory. */
 char *
-alloc_nat_zone_key(const struct uuid *key, const char *type)
+alloc_nat_zone_key(const struct uuid *key1, const struct uuid *key2,
+                   const char *type)
 {
-    return xasprintf(UUID_FMT"_%s", UUID_ARGS(key), type);
+    return xasprintf(UUID_FMT"_"UUID_FMT"_%s", UUID_ARGS(key1),
+                     UUID_ARGS(key2), type);
 }
 
 const char *
diff --git a/lib/ovn-util.h b/lib/ovn-util.h
index 0f7b501..fe86bf8 100644
--- a/lib/ovn-util.h
+++ b/lib/ovn-util.h
@@ -77,7 +77,8 @@ bool extract_sbrec_binding_first_mac(const struct 
sbrec_port_binding *binding,
 
 void destroy_lport_addresses(struct lport_addresses *);
 
-char *alloc_nat_zone_key(const struct uuid *key, const char *type);
+char *alloc_nat_zone_key(const struct uuid *key1, const struct uuid *key2,
+                         const char *type);
 
 const char *default_nb_db(void);
 const char *default_sb_db(void);
-- 
1.8.3.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to