If a lot of conn entries expired in a single zone we previously took the
zone_lock for each conn entry we deleted. This caused a lot of churn on
the lock, especially if there are parallel inserts.

We fix this by batching the deletions on 100 entries and taking a lock
for all of them at once.

When using the same testcases as in the previous commits
Below lists the total time for the testrun with these various configs.
|          | Without this patch | With this patch |
| Config 1 | 31.7 s             | 31.0 s          |
| Config 2 | 32.6 s             | 31.6 s          |
| Config 3 | 42.2 s             | 7.8 s           |
| Config 4 | 16.2 s             | 15.9 s          |
| Config 5 | 45.3 s             | 45.7 s          |
| Config 6 | 45.1 s             | 46.1 s          |

Signed-off-by: Felix Huettner <[email protected]>
---
 lib/conntrack.c | 65 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 7edcf8cc4..da7ed0c77 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -429,34 +429,34 @@ zone_limit_update(struct conntrack *ct, int32_t zone, 
uint32_t limit)
 }
 
 static void
-conn_clean__(struct conntrack *ct, struct conn *conn)
+conn_clean_protected__(struct conntrack *ct, struct conntrack_zone *cz,
+                       struct conn *conn)
+    OVS_REQUIRES(cz->zone_lock)
 {
-    uint16_t fwd_zone;
-    struct conntrack_zone *cz;
     uint32_t hash;
 
+    COVERAGE_INC(conntrack_remove);
+
     if (conn->alg) {
         expectation_clean(ct, &conn->key_node[CT_DIR_FWD].key);
     }
 
-    fwd_zone = conn->key_node[CT_DIR_FWD].key.zone;
-    cz = zone_lookup(ct, fwd_zone);
     hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis);
-    ovs_mutex_lock(&cz->zone_lock);
     cmap_remove(&cz->conns,
                 &conn->key_node[CT_DIR_FWD].cm_node, hash);
     atomic_count_dec(&cz->count);
 
     if (conn->nat_action) {
-        ovs_assert(fwd_zone == conn->key_node[CT_DIR_REV].key.zone);
+        ovs_assert(conn->key_node[CT_DIR_FWD].key.zone ==
+                   conn->key_node[CT_DIR_REV].key.zone);
         hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key,
                              ct->hash_basis);
-
         cmap_remove(&cz->conns,
                     &conn->key_node[CT_DIR_REV].cm_node, hash);
     }
 
-    ovs_mutex_unlock(&cz->zone_lock);
+    ovsrcu_postpone(delete_conn, conn);
+    atomic_count_dec(&ct->n_conn);
 }
 
 /* Also removes the associated nat 'conn' from the lookup
@@ -469,11 +469,27 @@ conn_clean(struct conntrack *ct, struct conn *conn)
         return;
     }
 
-    COVERAGE_INC(conntrack_remove);
-    conn_clean__(ct, conn);
+    struct conntrack_zone *cz = zone_lookup(ct,
+        conn->key_node[CT_DIR_FWD].key.zone);
 
-    ovsrcu_postpone(delete_conn, conn);
-    atomic_count_dec(&ct->n_conn);
+    ovs_mutex_lock(&cz->zone_lock);
+    conn_clean_protected__(ct, cz, conn);
+    ovs_mutex_unlock(&cz->zone_lock);
+}
+
+/* Also removes the associated nat 'conn' from the lookup
+   datastructures. */
+static void
+conn_clean_protected(struct conntrack *ct, struct conntrack_zone *cz,
+                     struct conn *conn)
+    OVS_EXCLUDED(conn->lock)
+    OVS_REQUIRES(cz->zone_lock)
+{
+    if (atomic_flag_test_and_set(&conn->reclaimed)) {
+        return;
+    }
+
+    conn_clean_protected__(ct, cz, conn);
 }
 
 static void
@@ -1487,6 +1503,18 @@ conntrack_get_sweep_interval(struct conntrack *ct)
     return ms;
 }
 
+static void
+ct_sweep_buf(struct conntrack *ct, uint16_t zone, struct conn *conn_buf[100],
+             unsigned int n_conn_buf)
+{
+    struct conntrack_zone *cz = zone_lookup(ct, zone);
+    ovs_mutex_lock(&cz->zone_lock);
+    for (unsigned i = 0; i < n_conn_buf; i++) {
+        conn_clean_protected(ct, cz, conn_buf[i]);
+    }
+    ovs_mutex_unlock(&cz->zone_lock);
+}
+
 static size_t
 ct_sweep_zone(struct conntrack *ct, uint16_t zone, long long now,
               size_t *cleaned_count, size_t limit,
@@ -1500,6 +1528,10 @@ ct_sweep_zone(struct conntrack *ct, uint16_t zone, long 
long now,
     struct cmap_node *node;
     long long expiration;
 
+#define CONN_BUF_SIZE 100
+    struct conn *conn_buf[CONN_BUF_SIZE];
+    unsigned int n_conn_buf = 0;
+
     cz = zone_lookup(ct, zone);
     if (atomic_count_get(&cz->count) == 0) {
         return conn_count;
@@ -1522,12 +1554,17 @@ ct_sweep_zone(struct conntrack *ct, uint16_t zone, long 
long now,
         conn = CONTAINER_OF(keyn, struct conn, key_node[keyn->dir]);
         expiration = conn_expiration(conn);
         if (now >= expiration) {
-            conn_clean(ct, conn);
+            conn_buf[n_conn_buf++] = conn;
             (*cleaned_count)++;
          }
+        if (n_conn_buf == CONN_BUF_SIZE) {
+            ct_sweep_buf(ct, zone, conn_buf, n_conn_buf);
+            n_conn_buf = 0;
+        }
 
         conn_count++;
     }
+    ct_sweep_buf(ct, zone, conn_buf, n_conn_buf);
 
     free(*current_position);
     *current_position = NULL;
-- 
2.43.0


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to