If a lot of conn entries expired in a single zone we previously took the zone_lock for each conn entry we deleted. This caused a lot of churn on the lock, especially if there are parallel inserts.
We fix this by batching the deletions on 100 entries and taking a lock for all of them at once. When using the same testcases as in the previous commits Below lists the total time for the testrun with these various configs. | | Without this patch | With this patch | | Config 1 | 31.7 s | 31.0 s | | Config 2 | 32.6 s | 31.6 s | | Config 3 | 42.2 s | 7.8 s | | Config 4 | 16.2 s | 15.9 s | | Config 5 | 45.3 s | 45.7 s | | Config 6 | 45.1 s | 46.1 s | Signed-off-by: Felix Huettner <[email protected]> --- lib/conntrack.c | 65 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/lib/conntrack.c b/lib/conntrack.c index 7edcf8cc4..da7ed0c77 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -429,34 +429,34 @@ zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit) } static void -conn_clean__(struct conntrack *ct, struct conn *conn) +conn_clean_protected__(struct conntrack *ct, struct conntrack_zone *cz, + struct conn *conn) + OVS_REQUIRES(cz->zone_lock) { - uint16_t fwd_zone; - struct conntrack_zone *cz; uint32_t hash; + COVERAGE_INC(conntrack_remove); + if (conn->alg) { expectation_clean(ct, &conn->key_node[CT_DIR_FWD].key); } - fwd_zone = conn->key_node[CT_DIR_FWD].key.zone; - cz = zone_lookup(ct, fwd_zone); hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis); - ovs_mutex_lock(&cz->zone_lock); cmap_remove(&cz->conns, &conn->key_node[CT_DIR_FWD].cm_node, hash); atomic_count_dec(&cz->count); if (conn->nat_action) { - ovs_assert(fwd_zone == conn->key_node[CT_DIR_REV].key.zone); + ovs_assert(conn->key_node[CT_DIR_FWD].key.zone == + conn->key_node[CT_DIR_REV].key.zone); hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key, ct->hash_basis); - cmap_remove(&cz->conns, &conn->key_node[CT_DIR_REV].cm_node, hash); } - ovs_mutex_unlock(&cz->zone_lock); + ovsrcu_postpone(delete_conn, conn); + atomic_count_dec(&ct->n_conn); } /* Also removes the associated nat 'conn' from the lookup @@ -469,11 +469,27 @@ conn_clean(struct conntrack *ct, struct conn *conn) return; } - COVERAGE_INC(conntrack_remove); - conn_clean__(ct, conn); + struct conntrack_zone *cz = zone_lookup(ct, + conn->key_node[CT_DIR_FWD].key.zone); - ovsrcu_postpone(delete_conn, conn); - atomic_count_dec(&ct->n_conn); + ovs_mutex_lock(&cz->zone_lock); + conn_clean_protected__(ct, cz, conn); + ovs_mutex_unlock(&cz->zone_lock); +} + +/* Also removes the associated nat 'conn' from the lookup + datastructures. */ +static void +conn_clean_protected(struct conntrack *ct, struct conntrack_zone *cz, + struct conn *conn) + OVS_EXCLUDED(conn->lock) + OVS_REQUIRES(cz->zone_lock) +{ + if (atomic_flag_test_and_set(&conn->reclaimed)) { + return; + } + + conn_clean_protected__(ct, cz, conn); } static void @@ -1487,6 +1503,18 @@ conntrack_get_sweep_interval(struct conntrack *ct) return ms; } +static void +ct_sweep_buf(struct conntrack *ct, uint16_t zone, struct conn *conn_buf[100], + unsigned int n_conn_buf) +{ + struct conntrack_zone *cz = zone_lookup(ct, zone); + ovs_mutex_lock(&cz->zone_lock); + for (unsigned i = 0; i < n_conn_buf; i++) { + conn_clean_protected(ct, cz, conn_buf[i]); + } + ovs_mutex_unlock(&cz->zone_lock); +} + static size_t ct_sweep_zone(struct conntrack *ct, uint16_t zone, long long now, size_t *cleaned_count, size_t limit, @@ -1500,6 +1528,10 @@ ct_sweep_zone(struct conntrack *ct, uint16_t zone, long long now, struct cmap_node *node; long long expiration; +#define CONN_BUF_SIZE 100 + struct conn *conn_buf[CONN_BUF_SIZE]; + unsigned int n_conn_buf = 0; + cz = zone_lookup(ct, zone); if (atomic_count_get(&cz->count) == 0) { return conn_count; @@ -1522,12 +1554,17 @@ ct_sweep_zone(struct conntrack *ct, uint16_t zone, long long now, conn = CONTAINER_OF(keyn, struct conn, key_node[keyn->dir]); expiration = conn_expiration(conn); if (now >= expiration) { - conn_clean(ct, conn); + conn_buf[n_conn_buf++] = conn; (*cleaned_count)++; } + if (n_conn_buf == CONN_BUF_SIZE) { + ct_sweep_buf(ct, zone, conn_buf, n_conn_buf); + n_conn_buf = 0; + } conn_count++; } + ct_sweep_buf(ct, zone, conn_buf, n_conn_buf); free(*current_position); *current_position = NULL; -- 2.43.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
