If a single zone creates a lot of churn we would have otherwise blocked
the clean thread for quite a while.

The approach with cmap_position might have the drawback that we
potentially skip some connections that have been inserted in the
meantime, but cmap_cursor can not be used as we have a rcu quiesce
period in between iterations.

When using the same testcases as in the previous commit
Below lists the total time for the testrun with these various configs.
|          | Without this patch | With this patch |
| Config 1 | 17.9 s             | 31.7 s          |
| Config 2 | 22.9 s             | 32.6 s          |
| Config 3 | 19.2 s             | 42.2 s          |
| Config 4 | 18.5 s             | 16.2 s          |
| Config 5 | 47.0 s             | 45.3 s          |
| Config 6 | 51.1 s             | 45.1 s          |

Obviously we are worse in most cases. However this patch is necessary to
ensure even a single loaded zone will not cause the clean thread to
take forever and thereby block rcu cleanup. Future commits will make
this fast again.

Signed-off-by: Felix Huettner <[email protected]>
---
 lib/conntrack-private.h | 11 ++++++++---
 lib/conntrack.c         | 38 ++++++++++++++++++++++++++++----------
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 8282bbe5a..656222086 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -215,9 +215,13 @@ struct conntrack {
     atomic_uint32_t default_zone_limit;
 
     uint32_t hash_basis; /* Salt for hashing a connection key. */
+
+    /* Background cleanup thread. */
     pthread_t clean_thread; /* Periodically cleans up connection tracker. */
     struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
-    unsigned int next_clean_zone; /* Next zone where the clean should run. */
+    uint16_t current_clean_zone; /* Current zone where the clean should run. */
+    /* The position in the cmap of the current zone. */
+    struct cmap_position *current_clean_position;
 
     /* Counting connections. */
     atomic_count n_conn; /* Number of connections currently tracked. */
@@ -239,8 +243,9 @@ struct conntrack {
 
 /* Lock acquisition order:
  *    1. 'conn->lock'
- *    2. 'ct_lock'
- *    3. 'resources_lock'
+ *    2. 'zone_lock'
+ *    3. 'ct_lock'
+ *    4. 'resources_lock'
  */
 
 extern struct ct_l4_proto ct_proto_tcp;
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 6d28cffe3..7edcf8cc4 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -519,6 +519,10 @@ conntrack_destroy(struct conntrack *ct)
     ovs_mutex_unlock(&ct->resources_lock);
     ovs_mutex_destroy(&ct->resources_lock);
 
+    if (ct->current_clean_position) {
+        free(ct->current_clean_position);
+    }
+
     ipf_destroy(ct->ipf);
     free(ct);
 }
@@ -1485,18 +1489,32 @@ conntrack_get_sweep_interval(struct conntrack *ct)
 
 static size_t
 ct_sweep_zone(struct conntrack *ct, uint16_t zone, long long now,
-              size_t *cleaned_count)
+              size_t *cleaned_count, size_t limit,
+              struct cmap_position **current_position)
     OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     struct conn_key_node *keyn;
     struct conntrack_zone *cz;
     unsigned int conn_count = 0;
-    unsigned int cleaned = 0;
     struct conn *conn;
+    struct cmap_node *node;
     long long expiration;
 
     cz = zone_lookup(ct, zone);
-    CMAP_FOR_EACH (keyn, cm_node, &cz->conns) {
+    if (atomic_count_get(&cz->count) == 0) {
+        return conn_count;
+    }
+
+    if (!*current_position) {
+        *current_position = xzalloc(sizeof(**current_position));
+    }
+
+    while ((node = cmap_next_position(&cz->conns, *current_position))) {
+        keyn = OBJECT_CONTAINING(node, keyn, cm_node);
+        if (conn_count > limit) {
+            return conn_count;
+        }
+
         if (keyn->dir != CT_DIR_FWD) {
             continue;
         }
@@ -1505,12 +1523,14 @@ ct_sweep_zone(struct conntrack *ct, uint16_t zone, long 
long now,
         expiration = conn_expiration(conn);
         if (now >= expiration) {
             conn_clean(ct, conn);
-            cleaned++;
+            (*cleaned_count)++;
          }
 
         conn_count++;
     }
-    *cleaned_count = cleaned;
+
+    free(*current_position);
+    *current_position = NULL;
     return conn_count;
 }
 
@@ -1524,7 +1544,6 @@ conntrack_clean(struct conntrack *ct, long long now)
     unsigned int n_conn_limit, i;
     size_t clean_end, count = 0;
     size_t total_cleaned = 0;
-    uint16_t current_zone = ct->next_clean_zone;
 
     atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
     clean_end = n_conn_limit / 64;
@@ -1537,16 +1556,15 @@ conntrack_clean(struct conntrack *ct, long long now)
             break;
         }
 
-        count += ct_sweep_zone(ct, current_zone, now, &cleaned);
+        count += ct_sweep_zone(ct, ct->current_clean_zone, now, &cleaned,
+                               clean_end - count, &ct->current_clean_position);
         total_cleaned += cleaned;
 
         /* This will overflow and thereby allow us to iterate through all
          * zones. */
-        current_zone++;
+        ct->current_clean_zone++;
     }
 
-    ct->next_clean_zone = current_zone + 1;
-
     VLOG_DBG("conntrack cleaned %"PRIuSIZE" entries out of %"PRIuSIZE
              " entries in %lld msec", total_cleaned, count,
              time_msec() - now);
-- 
2.43.0


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to