(CC'ing Felix) Aaron Conole <[email protected]> writes:
> Felix Huettner via dev <[email protected]> writes: > >> Previously modifications to the list of conntrack entries required >> taking a global lock. As zones are generally separate from each other >> there is no need for such a global lock. This allows multiple pmds to >> handle changes to different zones in parallel. >> >> During the removal of the global lock we also extracted the zone limits >> to be part of the individual zones. This skips an additional need for >> global locks. >> >> In addition this required a restructuring of conntrack_cleanup to no >> longer use rculists and instead iterate over each zone. This also allows >> to parallalize such operations in the future. >> >> The new "ovstest test-conntrack benchmark-tcp" shows the benefits >> nicely. They mostly exist on connections that are setup and teared down >> all the time (maybe as part of some portscanning or similar). >> >> When using the same testcases as in the previous commits >> Below lists the total time for the testrun with these various configs. >> | | Without this patch | With this patch | >> | Config 1 | 70.5 s | 17.9 s | >> | Config 2 | 80.7 s | 22.9 s | >> | Config 3 | 65.9 s | 19.2 s | >> | Config 4 | 71.4 s | 18.5 s | > > This is a really large change. And the following are some large > regressions: > >> | Config 5 | 43.3 s | 47.0 s | >> | Config 6 | 46.6 s | 51.1 s | > > I think they might be tied to how the new list iteration happens - IE we > now walk all connections in the zone rather than having an expiration > list. > > But I wonder how it will look with even more connections than config 6. > > Did you test with hundreds of thousands of connections? I would be more > interested in a dataset with larger numbers of connections, rather than > micro-benchmark. I suspect the architecture of this is showing that > for small numbers of connections (maybe even a small number of thousand) > things look good. > > Maybe try with something like 4 threads, and 46,877 connections per > zone? Just some way to exercise the clean thread so it needs to > continue (with the benchmarks above it barely wakes). > >> >> Signed-off-by: Felix Huettner <[email protected]> >> --- >> lib/conntrack-private.h | 36 ++-- >> lib/conntrack.c | 445 ++++++++++++++-------------------------- >> 2 files changed, 166 insertions(+), 315 deletions(-) >> >> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h >> index 0141afd4a..8282bbe5a 100644 >> --- a/lib/conntrack-private.h >> +++ b/lib/conntrack-private.h >> @@ -134,9 +134,6 @@ struct conn { >> * True as soon as a thread has started freeing >> * its memory. */ >> >> - /* Inserted once by a PMD, then managed by the 'ct_clean' thread. */ >> - struct rculist node; >> - >> /* Mutable data. */ >> struct ovs_mutex lock; /* Guards all mutable fields. */ >> ovs_u128 label; >> @@ -144,10 +141,6 @@ struct conn { >> uint32_t mark; >> int seq_skew; >> >> - /* Immutable data. */ >> - int32_t admit_zone; /* The zone for managing zone limit counts. */ >> - uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ >> - >> /* Mutable data. */ >> bool seq_skew_dir; /* TCP sequence skew direction due to NATTing of FTP >> * control messages; true if reply direction. */ >> @@ -198,32 +191,33 @@ enum ct_ephemeral_range { >> #define FOR_EACH_PORT_IN_RANGE(curr, min, max) \ >> FOR_EACH_PORT_IN_RANGE__(curr, min, max, OVS_JOIN(idx, __COUNTER__)) >> >> -#define ZONE_LIMIT_CONN_DEFAULT -1 >> +#define CONN_LIMIT_NONE -1 >> +#define CONN_LIMIT_USE_DEFAULT -2 >> >> -struct conntrack_zone_limit { >> +struct conntrack_zone { >> int32_t zone; >> - atomic_int64_t limit; >> - atomic_count count; >> - uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ >> + >> + struct ovs_mutex zone_lock; /* Protects the following fields. */ >> + struct cmap conns; >> + >> + /* Limits */ >> + atomic_int64_t limit; /* Currently active limit. */ >> + atomic_int64_t requested_limit; /* User requested limit. May be >> + * ZONE_LIMIT_CONN_DEFAULT if it should use >> + * the default limit. */ >> + atomic_count count; /* Number of connections currently tracked. */ >> }; >> >> struct conntrack { >> struct ovs_mutex ct_lock; /* Protects the following fields. */ >> - struct cmap conns[UINT16_MAX + 1]; >> - struct rculist exp_lists[N_EXP_LISTS]; >> - struct cmap zone_limits; >> + struct conntrack_zone zones[UINT16_MAX + 1]; >> struct cmap timeout_policies; >> - uint32_t zone_limit_seq OVS_GUARDED; /* Used to disambiguate zone limit >> - * counts. */ >> atomic_uint32_t default_zone_limit; >> >> uint32_t hash_basis; /* Salt for hashing a connection key. */ >> pthread_t clean_thread; /* Periodically cleans up connection tracker. */ >> struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */ >> - unsigned int next_list; /* Next list where the newly created connection >> - * gets inserted. */ >> - unsigned int next_sweep; /* List from which the gc thread will resume >> - * the sweeping. */ >> + unsigned int next_clean_zone; /* Next zone where the clean should run. >> */ >> >> /* Counting connections. */ >> atomic_count n_conn; /* Number of connections currently tracked. */ >> diff --git a/lib/conntrack.c b/lib/conntrack.c >> index ffff4111f..6d28cffe3 100644 >> --- a/lib/conntrack.c >> +++ b/lib/conntrack.c >> @@ -41,7 +41,6 @@ >> #include "ovs-thread.h" >> #include "openvswitch/poll-loop.h" >> #include "random.h" >> -#include "rculist.h" >> #include "timeval.h" >> #include "unaligned.h" >> >> @@ -89,11 +88,6 @@ enum ct_alg_ctl_type { >> CT_ALG_CTL_SIP, >> }; >> >> -struct zone_limit { >> - struct cmap_node node; >> - struct conntrack_zone_limit czl; >> -}; >> - >> static bool conn_key_extract(struct conntrack *, struct dp_packet *, >> ovs_be16 dl_type, struct conn_lookup_ctx *, >> uint16_t zone); >> @@ -111,7 +105,6 @@ static enum ct_update_res conn_update(struct > conntrack *ct, struct conn *conn, >> long long now); >> static long long int conn_expiration(const struct conn *); >> static bool conn_expired(const struct conn *, long long now); >> -static void conn_expire_push_front(struct conntrack *ct, struct conn *conn); >> static void set_mark(struct dp_packet *, struct conn *, >> uint32_t val, uint32_t mask); >> static void set_label(struct dp_packet *, struct conn *, >> @@ -267,14 +260,12 @@ conntrack_init(void) >> >> ovs_mutex_init_adaptive(&ct->ct_lock); >> ovs_mutex_lock(&ct->ct_lock); >> - for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) { >> - cmap_init(&ct->conns[i]); >> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) { >> + ovs_mutex_init_adaptive(&ct->zones[i].zone_lock); >> + cmap_init(&ct->zones[i].conns); >> + ct->zones[i].limit = CONN_LIMIT_NONE; >> + ct->zones[i].requested_limit = CONN_LIMIT_USE_DEFAULT; >> } >> - for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) { >> - rculist_init(&ct->exp_lists[i]); >> - } >> - cmap_init(&ct->zone_limits); >> - ct->zone_limit_seq = 0; >> timeout_policy_init(ct); >> ovs_mutex_unlock(&ct->ct_lock); >> >> @@ -282,7 +273,7 @@ conntrack_init(void) >> atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT); >> atomic_init(&ct->tcp_seq_chk, true); >> atomic_init(&ct->sweep_ms, 20000); >> - atomic_init(&ct->default_zone_limit, 0); >> + atomic_init(&ct->default_zone_limit, CONN_LIMIT_NONE); >> latch_init(&ct->clean_thread_exit); >> ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct); >> ct->ipf = ipf_init(); >> @@ -302,114 +293,28 @@ conntrack_init(void) >> return ct; >> } >> >> -static uint32_t >> -zone_key_hash(int32_t zone, uint32_t basis) >> -{ >> - size_t hash = hash_int((OVS_FORCE uint32_t) zone, basis); >> - return hash; >> -} >> - >> static int64_t >> -zone_limit_get_limit__(struct conntrack_zone_limit *czl) >> +zone_read_limit(struct conntrack_zone *cz) >> { >> int64_t limit; >> - atomic_read_relaxed(&czl->limit, &limit); >> - >> + atomic_read_relaxed(&cz->limit, &limit); >> return limit; >> } >> >> -static int64_t >> -zone_limit_get_limit(struct conntrack *ct, struct conntrack_zone_limit *czl) >> +static struct conntrack_zone * >> +zone_lookup(struct conntrack *ct, int32_t zone) >> { >> - int64_t limit = zone_limit_get_limit__(czl); >> - >> - if (limit == ZONE_LIMIT_CONN_DEFAULT) { >> - atomic_read_relaxed(&ct->default_zone_limit, &limit); >> - limit = limit ? limit : -1; >> + if (zone < MIN_ZONE || zone > MAX_ZONE) { >> + return NULL; >> } >> >> - return limit; >> -} >> - >> -static struct zone_limit * >> -zone_limit_lookup_protected(struct conntrack *ct, int32_t zone) >> - OVS_REQUIRES(ct->ct_lock) >> -{ >> - uint32_t hash = zone_key_hash(zone, ct->hash_basis); >> - struct zone_limit *zl; >> - CMAP_FOR_EACH_WITH_HASH_PROTECTED (zl, node, hash, &ct->zone_limits) { >> - if (zl->czl.zone == zone) { >> - return zl; >> - } >> - } >> - return NULL; >> -} >> - >> -static struct zone_limit * >> -zone_limit_lookup(struct conntrack *ct, int32_t zone) >> -{ >> - uint32_t hash = zone_key_hash(zone, ct->hash_basis); >> - struct zone_limit *zl; >> - CMAP_FOR_EACH_WITH_HASH (zl, node, hash, &ct->zone_limits) { >> - if (zl->czl.zone == zone) { >> - return zl; >> - } >> - } >> - return NULL; >> -} >> - >> -static struct zone_limit * >> -zone_limit_create__(struct conntrack *ct, int32_t zone, int64_t limit) >> - OVS_REQUIRES(ct->ct_lock) >> -{ >> - struct zone_limit *zl = NULL; >> - >> - if (zone > DEFAULT_ZONE && zone <= MAX_ZONE) { >> - zl = xmalloc(sizeof *zl); >> - atomic_init(&zl->czl.limit, limit); >> - atomic_count_init(&zl->czl.count, 0); >> - zl->czl.zone = zone; >> - zl->czl.zone_limit_seq = ct->zone_limit_seq++; >> - uint32_t hash = zone_key_hash(zone, ct->hash_basis); >> - cmap_insert(&ct->zone_limits, &zl->node, hash); >> - } >> - >> - return zl; >> -} >> - >> -static struct zone_limit * >> -zone_limit_create(struct conntrack *ct, int32_t zone, int64_t limit) >> - OVS_REQUIRES(ct->ct_lock) >> -{ >> - struct zone_limit *zl = zone_limit_lookup_protected(ct, zone); >> - >> - if (zl) { >> - return zl; >> - } >> - >> - return zone_limit_create__(ct, zone, limit); >> -} >> - >> -/* Lazily creates a new entry in the zone_limits cmap if default limit >> - * is set and there's no entry for the zone. */ >> -static struct zone_limit * >> -zone_limit_lookup_or_default(struct conntrack *ct, int32_t zone) >> - OVS_REQUIRES(ct->ct_lock) >> -{ >> - struct zone_limit *zl = zone_limit_lookup_protected(ct, zone); >> - >> - if (!zl) { >> - uint32_t limit; >> - atomic_read_relaxed(&ct->default_zone_limit, &limit); >> - >> - if (limit) { >> - zl = zone_limit_create__(ct, zone, ZONE_LIMIT_CONN_DEFAULT); >> - } >> - } >> - >> - return zl; >> + return &ct->zones[zone]; >> } >> >> +/* Returns the conntrack_zone_info for the requested zone. >> + * Note: to be compatible with the kernel implementation we return empty >> + * entries if the default limit is to be used. >> + * This can be improved upon later. */ >> struct conntrack_zone_info >> zone_limit_get(struct conntrack *ct, int32_t zone) >> { >> @@ -418,82 +323,66 @@ zone_limit_get(struct conntrack *ct, int32_t zone) >> .limit = 0, >> .count = 0, >> }; >> - struct zone_limit *zl = zone_limit_lookup(ct, zone); >> - if (zl) { >> - int64_t czl_limit = zone_limit_get_limit__(&zl->czl); >> - if (czl_limit > ZONE_LIMIT_CONN_DEFAULT) { >> - czl.zone = zl->czl.zone; >> - czl.limit = czl_limit; >> - } else { >> - atomic_read_relaxed(&ct->default_zone_limit, &czl.limit); >> - } >> - >> - czl.count = atomic_count_get(&zl->czl.count); >> - } else { >> + if (zone == DEFAULT_ZONE) { >> atomic_read_relaxed(&ct->default_zone_limit, &czl.limit); >> + } else { >> + struct conntrack_zone *cz = zone_lookup(ct, zone); >> + uint64_t req_limit; >> + atomic_read_relaxed(&cz->requested_limit, &req_limit); >> + if (req_limit != CONN_LIMIT_USE_DEFAULT) { >> + czl.zone = zone; >> + czl.limit = zone_read_limit(cz); >> + czl.count = atomic_count_get(&cz->count); >> + } >> + } >> + >> + if (czl.limit == CONN_LIMIT_NONE) { >> + czl.limit = 0; >> } >> >> return czl; >> } >> >> -static void >> -zone_limit_clean__(struct conntrack *ct, struct zone_limit *zl) >> - OVS_REQUIRES(ct->ct_lock) >> +static bool >> +zone_limit_reset(struct conntrack *ct, struct conntrack_zone *cz) >> { >> - uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis); >> - cmap_remove(&ct->zone_limits, &zl->node, hash); >> - ovsrcu_postpone(free, zl); >> -} >> - >> -static void >> -zone_limit_clean(struct conntrack *ct, struct zone_limit *zl) >> - OVS_REQUIRES(ct->ct_lock) >> -{ >> - uint32_t limit; >> + int64_t limit; >> + atomic_read_relaxed(&cz->requested_limit, &limit); >> + if (limit == CONN_LIMIT_USE_DEFAULT) { >> + return false; >> + } >> >> atomic_read_relaxed(&ct->default_zone_limit, &limit); >> - /* Do not remove the entry if the default limit is enabled, but >> - * simply move the limit to default. */ >> - if (limit) { >> - atomic_store_relaxed(&zl->czl.limit, ZONE_LIMIT_CONN_DEFAULT); >> - } else { >> - zone_limit_clean__(ct, zl); >> - } >> + atomic_store_relaxed(&cz->limit, limit); >> + atomic_store_relaxed(&cz->requested_limit, CONN_LIMIT_USE_DEFAULT); >> + return true; >> } >> >> + >> static void >> -zone_limit_clean_default(struct conntrack *ct) >> - OVS_REQUIRES(ct->ct_lock) >> +zone_limit_update_default(struct conntrack *ct, uint32_t limit) >> { >> - struct zone_limit *zl; >> - int64_t czl_limit; >> + int64_t cz_req_limit; >> >> - atomic_store_relaxed(&ct->default_zone_limit, 0); >> + atomic_store_relaxed(&ct->default_zone_limit, limit); >> >> - CMAP_FOR_EACH (zl, node, &ct->zone_limits) { >> - atomic_read_relaxed(&zl->czl.limit, &czl_limit); >> - if (zone_limit_get_limit__(&zl->czl) == ZONE_LIMIT_CONN_DEFAULT) { >> - zone_limit_clean__(ct, zl); >> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) { >> + atomic_read_relaxed(&ct->zones[i].requested_limit, &cz_req_limit); >> + if (cz_req_limit == CONN_LIMIT_USE_DEFAULT) { >> + atomic_store_relaxed(&ct->zones[i].limit, limit); >> } >> } >> } >> >> static bool >> zone_limit_delete__(struct conntrack *ct, int32_t zone) >> - OVS_REQUIRES(ct->ct_lock) >> { >> - struct zone_limit *zl = NULL; >> - >> if (zone == DEFAULT_ZONE) { >> - zone_limit_clean_default(ct); >> + zone_limit_update_default(ct, 0); >> + return false; >> } else { >> - zl = zone_limit_lookup_protected(ct, zone); >> - if (zl) { >> - zone_limit_clean(ct, zl); >> - } >> + return zone_limit_reset(ct, zone_lookup(ct, zone)); >> } >> - >> - return zl != NULL; >> } >> >> int >> @@ -501,9 +390,7 @@ zone_limit_delete(struct conntrack *ct, int32_t zone) >> { >> bool deleted; >> >> - ovs_mutex_lock(&ct->ct_lock); >> deleted = zone_limit_delete__(ct, zone); >> - ovs_mutex_unlock(&ct->ct_lock); >> >> if (zone != DEFAULT_ZONE) { >> VLOG_INFO(deleted >> @@ -515,45 +402,27 @@ zone_limit_delete(struct conntrack *ct, int32_t zone) >> return 0; >> } >> >> -static void >> -zone_limit_update_default(struct conntrack *ct, int32_t zone, uint32_t >> limit) >> -{ >> - /* limit zero means delete default. */ >> - if (limit == 0) { >> - ovs_mutex_lock(&ct->ct_lock); >> - zone_limit_delete__(ct, zone); >> - ovs_mutex_unlock(&ct->ct_lock); >> - } else { >> - atomic_store_relaxed(&ct->default_zone_limit, limit); >> - } >> -} >> - >> int >> zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit) >> { >> - struct zone_limit *zl; >> + struct conntrack_zone *cz; >> int err = 0; >> >> if (zone == DEFAULT_ZONE) { >> - zone_limit_update_default(ct, zone, limit); >> + zone_limit_update_default(ct, limit); >> VLOG_INFO("Set default zone limit to %u", limit); >> return err; >> } >> >> - zl = zone_limit_lookup(ct, zone); >> - if (zl) { >> - atomic_store_relaxed(&zl->czl.limit, limit); >> - VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone); >> + cz = zone_lookup(ct, zone); >> + if (cz) { >> + atomic_store_relaxed(&cz->limit, limit); >> + atomic_store_relaxed(&cz->requested_limit, limit); >> + VLOG_INFO("Set zone limit of %u for zone %d", limit, zone); >> } else { >> - ovs_mutex_lock(&ct->ct_lock); >> - err = zone_limit_create(ct, zone, limit) == NULL; >> - ovs_mutex_unlock(&ct->ct_lock); >> - if (!err) { >> - VLOG_INFO("Created zone limit of %u for zone %d", limit, zone); >> - } else { >> - VLOG_WARN("Request to create zone limit for invalid zone %d", >> - zone); >> - } >> + VLOG_WARN("Request to create zone limit for invalid zone %d", >> + zone); >> + err = 1; >> } >> >> return err; >> @@ -561,47 +430,47 @@ zone_limit_update(struct conntrack *ct, > int32_t zone, uint32_t limit) >> >> static void >> conn_clean__(struct conntrack *ct, struct conn *conn) >> - OVS_REQUIRES(ct->ct_lock) >> { >> + uint16_t fwd_zone; >> + struct conntrack_zone *cz; >> uint32_t hash; >> >> if (conn->alg) { >> expectation_clean(ct, &conn->key_node[CT_DIR_FWD].key); >> } >> >> + fwd_zone = conn->key_node[CT_DIR_FWD].key.zone; >> + cz = zone_lookup(ct, fwd_zone); >> hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis); >> - cmap_remove(&ct->conns[conn->key_node[CT_DIR_FWD].key.zone], >> + ovs_mutex_lock(&cz->zone_lock); >> + cmap_remove(&cz->conns, >> &conn->key_node[CT_DIR_FWD].cm_node, hash); >> + atomic_count_dec(&cz->count); >> >> if (conn->nat_action) { >> + ovs_assert(fwd_zone == conn->key_node[CT_DIR_REV].key.zone); >> hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key, >> ct->hash_basis); >> - cmap_remove(&ct->conns[conn->key_node[CT_DIR_REV].key.zone], >> + >> + cmap_remove(&cz->conns, >> &conn->key_node[CT_DIR_REV].cm_node, hash); >> } >> >> - rculist_remove(&conn->node); >> + ovs_mutex_unlock(&cz->zone_lock); >> } >> >> /* Also removes the associated nat 'conn' from the lookup >> datastructures. */ >> static void >> conn_clean(struct conntrack *ct, struct conn *conn) >> - OVS_EXCLUDED(conn->lock, ct->ct_lock) >> + OVS_EXCLUDED(conn->lock) >> { >> if (atomic_flag_test_and_set(&conn->reclaimed)) { >> return; >> } >> >> COVERAGE_INC(conntrack_remove); >> - ovs_mutex_lock(&ct->ct_lock); >> conn_clean__(ct, conn); >> - ovs_mutex_unlock(&ct->ct_lock); >> - >> - struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone); >> - if (zl && zl->czl.zone_limit_seq == conn->zone_limit_seq) { >> - atomic_count_dec(&zl->czl.count); >> - } >> >> ovsrcu_postpone(delete_conn, conn); >> atomic_count_dec(&ct->n_conn); >> @@ -619,26 +488,10 @@ conn_force_expire(struct conn *conn) >> void >> conntrack_destroy(struct conntrack *ct) >> { >> - struct conn *conn; >> - >> latch_set(&ct->clean_thread_exit); >> pthread_join(ct->clean_thread, NULL); >> latch_destroy(&ct->clean_thread_exit); >> >> - for (unsigned i = 0; i < N_EXP_LISTS; i++) { >> - RCULIST_FOR_EACH (conn, node, &ct->exp_lists[i]) { >> - conn_clean(ct, conn); >> - } >> - } >> - >> - struct zone_limit *zl; >> - CMAP_FOR_EACH (zl, node, &ct->zone_limits) { >> - uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis); >> - >> - cmap_remove(&ct->zone_limits, &zl->node, hash); >> - ovsrcu_postpone(free, zl); >> - } >> - >> struct timeout_policy *tp; >> CMAP_FOR_EACH (tp, node, &ct->timeout_policies) { >> uint32_t hash = hash_int(tp->policy.id, ct->hash_basis); >> @@ -649,14 +502,14 @@ conntrack_destroy(struct conntrack *ct) >> >> conntrack_flush(ct, NULL); >> >> - ovs_mutex_lock(&ct->ct_lock); >> - for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) { >> - cmap_destroy(&ct->conns[i]); >> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) { >> + ovs_mutex_lock(&ct->zones[i].zone_lock); >> + cmap_destroy(&ct->zones[i].conns); >> + ovs_mutex_unlock(&ct->zones[i].zone_lock); >> + ovs_mutex_destroy(&ct->zones[i].zone_lock); >> } >> - cmap_destroy(&ct->zone_limits); >> cmap_destroy(&ct->timeout_policies); >> >> - ovs_mutex_unlock(&ct->ct_lock); >> ovs_mutex_destroy(&ct->ct_lock); >> >> ovs_mutex_lock(&ct->resources_lock); >> @@ -672,7 +525,7 @@ conntrack_destroy(struct conntrack *ct) >> >> >> static bool >> -conn_key_lookup(struct conntrack *ct, const struct conn_key *key, >> +conn_key_lookup(struct conntrack_zone *cz, const struct conn_key *key, >> uint32_t hash, long long now, struct conn **conn_out, >> bool *reply) >> { >> @@ -680,7 +533,8 @@ conn_key_lookup(struct conntrack *ct, const > struct conn_key *key, >> struct conn *conn = NULL; >> bool found = false; >> >> - CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns[key->zone]) { >> + CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, >> + &cz->conns) { >> if (keyn->dir == CT_DIR_FWD) { >> conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]); >> } else { >> @@ -712,12 +566,21 @@ out_found: >> return found; >> } >> >> +static bool >> +conn_lookup_zone(struct conntrack *ct, struct conntrack_zone *cz, >> + const struct conn_key *key, long long now, >> + struct conn **conn_out, bool *reply) >> +{ >> + uint32_t hash = conn_key_hash(key, ct->hash_basis); >> + return conn_key_lookup(cz, key, hash, now, conn_out, reply); >> +} >> + >> static bool >> conn_lookup(struct conntrack *ct, const struct conn_key *key, >> long long now, struct conn **conn_out, bool *reply) >> { >> - uint32_t hash = conn_key_hash(key, ct->hash_basis); >> - return conn_key_lookup(ct, key, hash, now, conn_out, reply); >> + struct conntrack_zone *cz = &ct->zones[key->zone]; >> + return conn_lookup_zone(ct, cz, key, now, conn_out, reply); >> } >> >> static void >> @@ -1026,27 +889,25 @@ ct_verify_helper(const char *helper, enum > ct_alg_ctl_type ct_alg_ctl) >> } >> >> static struct conn * >> -conn_insert(struct conntrack *ct, struct dp_packet *pkt, >> +conn_insert(struct conntrack *ct, struct conntrack_zone *cz, >> + struct dp_packet *pkt, >> struct conn_lookup_ctx *ctx, long long now, >> const struct nat_action_info_t *nat_action_info, >> const char *helper, const struct alg_exp_node *alg_exp, >> enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id) >> - OVS_REQUIRES(ct->ct_lock) >> + OVS_REQUIRES(cz->zone_lock) >> { >> COVERAGE_INC(conntrack_insert); >> struct conn *nc = NULL; >> >> int64_t czl_limit; >> struct conn_key_node *fwd_key_node, *rev_key_node; >> - struct zone_limit *zl = zone_limit_lookup_or_default(ct, >> - ctx->key.zone); >> - if (zl) { >> - czl_limit = zone_limit_get_limit(ct, &zl->czl); >> - if (czl_limit >= 0 && >> - atomic_count_get(&zl->czl.count) >= czl_limit) { >> - COVERAGE_INC(conntrack_zone_full); >> - return nc; >> - } >> + >> + czl_limit = zone_read_limit(cz); >> + if (czl_limit != CONN_LIMIT_NONE && >> + atomic_count_get(&cz->count) >= czl_limit) { >> + COVERAGE_INC(conntrack_zone_full); >> + return nc; > > Previously, limit=0 would delete the zone limit and reset to default. > But CONN_LIMIT_NONE is -1; and if someone changes the zone limit to '0' > they will max out all connections on the zone. > > I don't see that accounted for here. > >> } >> >> unsigned int n_conn_limit; >> @@ -1080,13 +941,6 @@ conn_insert(struct conntrack *ct, struct dp_packet >> *pkt, >> fwd_key_node->dir = CT_DIR_FWD; >> rev_key_node->dir = CT_DIR_REV; >> >> - if (zl) { >> - nc->admit_zone = zl->czl.zone; >> - nc->zone_limit_seq = zl->czl.zone_limit_seq; >> - } else { >> - nc->admit_zone = INVALID_ZONE; >> - } >> - >> if (nat_action_info) { >> nc->nat_action = nat_action_info->nat_action; >> >> @@ -1108,18 +962,14 @@ conn_insert(struct conntrack *ct, struct > dp_packet *pkt, >> nat_packet(pkt, nc, false, ctx->icmp_related); >> uint32_t rev_hash = conn_key_hash(&rev_key_node->key, >> ct->hash_basis); >> - cmap_insert(&ct->conns[ctx->key.zone], >> + cmap_insert(&ct->zones[ctx->key.zone].conns, >> &rev_key_node->cm_node, rev_hash); >> } >> >> - cmap_insert(&ct->conns[ctx->key.zone], >> + cmap_insert(&ct->zones[ctx->key.zone].conns, >> &fwd_key_node->cm_node, ctx->hash); >> - conn_expire_push_front(ct, nc); >> atomic_count_inc(&ct->n_conn); >> - >> - if (zl) { >> - atomic_count_inc(&zl->czl.count); >> - } >> + atomic_count_inc(&cz->count); >> >> ctx->conn = nc; /* For completeness. */ >> >> @@ -1165,6 +1015,7 @@ conn_maybe_not_found(struct conntrack *ct, > struct dp_packet *pkt, >> { >> COVERAGE_INC(conntrack_maybe_not_found); >> struct conn *nc = NULL; >> + struct conntrack_zone *cz = zone_lookup(ct, ctx->key.zone); >> >> /* Note that we only insert a connection if commit=true. In this >> * case we must ensure that the connection is not already part of >> @@ -1175,19 +1026,19 @@ conn_maybe_not_found(struct conntrack *ct, > struct dp_packet *pkt, >> * We do not use multiple small if's as this confused clangs locking >> * analysis. */ >> if (commit) { >> - ovs_mutex_lock(&ct->ct_lock); >> - bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL); >> + ovs_mutex_lock(&cz->zone_lock); >> + bool found = conn_lookup_zone(ct, cz, &ctx->key, now, NULL, NULL); >> if (!found) { >> if (!pkt_set_new_ct_state(pkt, ctx, alg_exp)) { >> - ovs_mutex_unlock(&ct->ct_lock); >> + ovs_mutex_unlock(&cz->zone_lock); >> return nc; >> } >> - nc = conn_insert(ct, pkt, ctx, now, nat_action_info, >> + nc = conn_insert(ct, cz, pkt, ctx, now, nat_action_info, >> helper, alg_exp, ct_alg_ctl, tp_id); >> } >> - ovs_mutex_unlock(&ct->ct_lock); >> + ovs_mutex_unlock(&cz->zone_lock); >> } else { >> - bool found = conn_lookup(ct, &ctx->key, now, NULL, NULL); >> + bool found = conn_lookup_zone(ct, cz, &ctx->key, now, NULL, NULL); >> if (!found) { >> pkt_set_new_ct_state(pkt, ctx, alg_exp); >> } >> @@ -1399,7 +1250,8 @@ initial_conn_lookup(struct conntrack *ct, > struct conn_lookup_ctx *ctx, >> conn_key_reverse(&ctx->key); >> } >> >> - conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); >> + struct conntrack_zone *cz = &ct->zones[ctx->key.zone]; >> + conn_key_lookup(cz, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); >> >> if (natted) { >> if (OVS_LIKELY(ctx->conn)) { >> @@ -1426,6 +1278,8 @@ process_one(struct conntrack *ct, struct dp_packet >> *pkt, >> const struct nat_action_info_t *nat_action_info, >> const char *helper, uint32_t tp_id) >> { >> + ovs_assert(ctx->key.zone == zone); >> + >> /* Reset ct_state whenever entering a new zone. */ >> if (pkt->md.ct_state && pkt->md.ct_zone != zone) { >> pkt->md.ct_state = 0; >> @@ -1630,28 +1484,34 @@ conntrack_get_sweep_interval(struct conntrack *ct) >> } >> >> static size_t >> -ct_sweep(struct conntrack *ct, struct rculist *list, long long now, >> - size_t *cleaned_count) >> +ct_sweep_zone(struct conntrack *ct, uint16_t zone, long long now, >> + size_t *cleaned_count) > > This used to be O(exp_list), but is now O(zone_conn_list). See above. > >> OVS_NO_THREAD_SAFETY_ANALYSIS >> { >> + struct conn_key_node *keyn; >> + struct conntrack_zone *cz; >> + unsigned int conn_count = 0; >> + unsigned int cleaned = 0; >> struct conn *conn; >> - size_t cleaned = 0; >> - size_t count = 0; >> + long long expiration; >> >> - RCULIST_FOR_EACH (conn, node, list) { >> - if (conn_expired(conn, now)) { >> - conn_clean(ct, conn); >> - cleaned++; >> + cz = zone_lookup(ct, zone); >> + CMAP_FOR_EACH (keyn, cm_node, &cz->conns) { >> + if (keyn->dir != CT_DIR_FWD) { >> + continue; >> } >> >> - count++; >> - } >> + conn = CONTAINER_OF(keyn, struct conn, key_node[keyn->dir]); >> + expiration = conn_expiration(conn); >> + if (now >= expiration) { >> + conn_clean(ct, conn); >> + cleaned++; >> + } > > ^ Spacing > >> >> - if (cleaned_count) { >> - *cleaned_count = cleaned; >> + conn_count++; >> } >> - >> - return count; >> + *cleaned_count = cleaned; >> + return conn_count; >> } >> >> /* Cleans up old connection entries from 'ct'. Returns the time >> @@ -1664,23 +1524,28 @@ conntrack_clean(struct conntrack *ct, long long now) >> unsigned int n_conn_limit, i; >> size_t clean_end, count = 0; >> size_t total_cleaned = 0; >> + uint16_t current_zone = ct->next_clean_zone; >> >> atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit); >> clean_end = n_conn_limit / 64; >> >> - for (i = ct->next_sweep; i < N_EXP_LISTS; i++) { >> - size_t cleaned; >> + for (i = 0; i < ARRAY_SIZE(ct->zones); i++) { >> + size_t cleaned = 0; >> >> if (count > clean_end) { >> next_wakeup = 0; >> break; >> } >> >> - count += ct_sweep(ct, &ct->exp_lists[i], now, &cleaned); >> + count += ct_sweep_zone(ct, current_zone, now, &cleaned); >> total_cleaned += cleaned; >> + >> + /* This will overflow and thereby allow us to iterate through all >> + * zones. */ >> + current_zone++; >> } >> >> - ct->next_sweep = (i < N_EXP_LISTS) ? i : 0; >> + ct->next_clean_zone = current_zone + 1; >> >> VLOG_DBG("conntrack cleaned %"PRIuSIZE" entries out of %"PRIuSIZE >> " entries in %lld msec", total_cleaned, count, >> @@ -2717,16 +2582,6 @@ conn_update(struct conntrack *ct, struct conn > *conn, struct dp_packet *pkt, >> return update_res; >> } >> >> -static void >> -conn_expire_push_front(struct conntrack *ct, struct conn *conn) >> - OVS_REQUIRES(ct->ct_lock) >> -{ >> - unsigned int curr = ct->next_list; >> - >> - ct->next_list = (ct->next_list + 1) % N_EXP_LISTS; >> - rculist_push_front(&ct->exp_lists[curr], &conn->node); >> -} >> - >> static long long int >> conn_expiration(const struct conn *conn) >> { >> @@ -2914,7 +2769,8 @@ conntrack_dump_start(struct conntrack *ct, > struct conntrack_dump *dump, >> >> dump->ct = ct; >> *ptot_bkts = 1; /* Need to clean up the callers. */ >> - dump->cursor = cmap_cursor_start(&dump->ct->conns[dump->current_zone]); >> + dump->cursor = cmap_cursor_start( >> + &dump->ct->zones[dump->current_zone].conns); >> return 0; >> } >> >> @@ -2945,7 +2801,8 @@ conntrack_dump_next(struct conntrack_dump > *dump, struct ct_dpif_entry *entry) >> break; >> } >> dump->current_zone++; >> - dump->cursor = > cmap_cursor_start(&dump->ct->conns[dump->current_zone]); >> + dump->cursor = cmap_cursor_start( >> + &dump->ct->zones[dump->current_zone].conns); >> } >> >> return EOF; >> @@ -3015,7 +2872,7 @@ conntrack_flush_zone(struct conntrack *ct, > const uint16_t zone) >> struct conn_key_node *keyn; >> struct conn *conn; >> >> - CMAP_FOR_EACH (keyn, cm_node, &ct->conns[zone]) { >> + CMAP_FOR_EACH (keyn, cm_node, &ct->zones[zone].conns) { >> if (keyn->dir != CT_DIR_FWD) { >> continue; >> } >> @@ -3033,7 +2890,7 @@ conntrack_flush(struct conntrack *ct, const > uint16_t *zone) >> return conntrack_flush_zone(ct, *zone); >> } >> >> - for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) { >> + for (unsigned i = 0; i < ARRAY_SIZE(ct->zones); i++) { >> conntrack_flush_zone(ct, i); >> } _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
