Am Fri, Jun 26, 2026 at 05:59:12PM -0400 schrieb Aaron Conole via dev: > Felix Huettner via dev <[email protected]> writes: > > > Previously we guarded expectations using an rwlock. While there are > > generally only a few or none expectation entries, taking and releasing > > the rwlock in read mode still takes more time than using a cmap. > > > > When doing a loadtest with OVN and natting across two LRs we increased > > our packet rate from 2.5 Mio pps to 12 Mio pps with only 40% of the > > previous load. This mainly comes from the UNSNAT and UNDNAT stages of OVN > > that > > generally do not match anything as long as we are in not specific cases. > > > > When using the same testcases as in the previous commits > > Below lists the total time for the testrun with these various configs. > > | | Without this patch | With this patch | > > | Config 1 | 70.8 s | 70.5 s | > > | Config 2 | 73.1 s | 80.7 s | > > ^^ O-o - this config got 10% worse, which is significant. Config4 might > see a 10% improvement, but it seems weird to trade one config > improvement for the other. > > Also, does config2 even use expectations? > 10 Threads with 10k connections each. Connections in 100 different > zones. Each connection is sends 20 data packets and we run 100 > iterations. > > I think it's important to understand why this case got worse and > document it here.
So none of these configs use expectations. After some further testing it seems that i had some noise on the system i did the benchmarks one. I can no longer reproduce this regression. I would redo the benchmark on all commits to ensure we have reliable values and share then in the next version of the series. > > > | Config 3 | 68.1 s | 65.9 s | > > | Config 4 | 78.5 s | 71.4 s | > > | Config 5 | 45.5 s | 43.3 s | > > | Config 6 | 47.3 s | 46.6 s | > > > > Co-Authored-by: Florian Werner <[email protected]> > > Signed-off-by: Florian Werner <[email protected]> > > Co-Authored-by: Sebastian Riese <[email protected]> > > Signed-off-by: Sebastian Riese <[email protected]> > > Signed-off-by: Felix Huettner <[email protected]> > > --- > > lib/conntrack-private.h | 9 ++-- > > lib/conntrack.c | 94 ++++++++++++++++++++--------------------- > > lib/conntrack.h | 5 +-- > > 3 files changed, 51 insertions(+), 57 deletions(-) > > > > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h > > index f1132e8aa..0141afd4a 100644 > > --- a/lib/conntrack-private.h > > +++ b/lib/conntrack-private.h > > @@ -75,7 +75,7 @@ BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct conn_key, > > nw_proto) == sizeof(uint8_t)); > > * connection. */ > > struct alg_exp_node { > > /* Node in alg_expectations. */ > > - struct hmap_node node; > > + struct cmap_node node; > > /* Node in alg_expectation_refs. */ > > struct hindex_node node_ref; > > /* Key of data connection to be created. */ > > @@ -231,9 +231,10 @@ struct conntrack { > > > > /* Expectations for application level gateways (created by control > > * connections to help create data connections, e.g. for FTP). */ > > - struct ovs_rwlock resources_lock; /* Protects fields below. */ > > - struct hmap alg_expectations OVS_GUARDED; /* Holds struct > > - * alg_exp_nodes. */ > > + struct ovs_mutex resources_lock; /* Protects fields below. */ > > + struct cmap alg_expectations; /* Holds struct alg_exp_nodes. > > + * resources_lock must be held for > > mutating > > + * operations. */ > > struct hindex alg_expectation_refs OVS_GUARDED; /* For lookup from > > * control context. */ > > > > diff --git a/lib/conntrack.c b/lib/conntrack.c > > index 4661e803f..fd1be21df 100644 > > --- a/lib/conntrack.c > > +++ b/lib/conntrack.c > > @@ -131,7 +131,7 @@ static inline bool > > extract_l3_ipv6(struct conn_key *key, const void *data, size_t size, > > const char **new_data); > > static struct alg_exp_node * > > -expectation_lookup(struct hmap *alg_expectations, const struct conn_key > > *key, > > +expectation_lookup(struct cmap *alg_expectations, const struct conn_key > > *key, > > uint32_t basis, bool src_ip_wc); > > > > static int > > @@ -154,6 +154,8 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx, > > > > static void > > expectation_clean(struct conntrack *ct, const struct conn_key *parent_key); > > +static void > > +expectation_flush(struct conntrack *ct) OVS_REQUIRES(ct->resources_lock); > > > > static struct ct_l4_proto *l4_protos[UINT8_MAX + 1]; > > > > @@ -254,11 +256,11 @@ conntrack_init(void) > > */ > > ct->hash_basis = random_uint32(); > > > > - ovs_rwlock_init(&ct->resources_lock); > > - ovs_rwlock_wrlock(&ct->resources_lock); > > - hmap_init(&ct->alg_expectations); > > + ovs_mutex_init(&ct->resources_lock); > > + ovs_mutex_lock(&ct->resources_lock); > > + cmap_init(&ct->alg_expectations); > > hindex_init(&ct->alg_expectation_refs); > > - ovs_rwlock_unlock(&ct->resources_lock); > > + ovs_mutex_unlock(&ct->resources_lock); > > > > ovs_mutex_init_adaptive(&ct->ct_lock); > > ovs_mutex_lock(&ct->ct_lock); > > @@ -641,8 +643,9 @@ conntrack_destroy(struct conntrack *ct) > > ovsrcu_postpone(free, tp); > > } > > > > - ovs_mutex_lock(&ct->ct_lock); > > + conntrack_flush(ct, NULL); > > This call to conntrack_flush() is weird to see. We already flush all > the connections, right? Actually, reading closer, maybe we aren't doing > that on destroy. I think this should be a separate patch adding > conntrack_flush and it is independent of this series. This is actually missatributed and should be part of the "conntrack: Split lock and limits per zone.". Until then we clean the connections based on the exp_lists. Will fix that in the next version. > > > + ovs_mutex_lock(&ct->ct_lock); > > for (unsigned i = 0; i < ARRAY_SIZE(ct->conns); i++) { > > cmap_destroy(&ct->conns[i]); > > } > > @@ -652,15 +655,12 @@ conntrack_destroy(struct conntrack *ct) > > ovs_mutex_unlock(&ct->ct_lock); > > ovs_mutex_destroy(&ct->ct_lock); > > > > - ovs_rwlock_wrlock(&ct->resources_lock); > > - struct alg_exp_node *alg_exp_node; > > - HMAP_FOR_EACH_POP (alg_exp_node, node, &ct->alg_expectations) { > > - free(alg_exp_node); > > - } > > - hmap_destroy(&ct->alg_expectations); > > + ovs_mutex_lock(&ct->resources_lock); > > + expectation_flush(ct); > > + cmap_destroy(&ct->alg_expectations); > > hindex_destroy(&ct->alg_expectation_refs); > > - ovs_rwlock_unlock(&ct->resources_lock); > > - ovs_rwlock_destroy(&ct->resources_lock); > > + ovs_mutex_unlock(&ct->resources_lock); > > + ovs_mutex_destroy(&ct->resources_lock); > > > > ipf_destroy(ct->ipf); > > free(ct); > > @@ -1473,7 +1473,6 @@ process_one(struct conntrack *ct, struct dp_packet > > *pkt, > > > > if (OVS_UNLIKELY(create_new_conn)) { > > > > - ovs_rwlock_rdlock(&ct->resources_lock); > > alg_exp = expectation_lookup(&ct->alg_expectations, &ctx->key, > > ct->hash_basis, > > alg_src_ip_wc(ct_alg_ctl)); > > @@ -1481,7 +1480,6 @@ process_one(struct conntrack *ct, struct dp_packet > > *pkt, > > memcpy(&alg_exp_entry, alg_exp, sizeof alg_exp_entry); > > alg_exp = &alg_exp_entry; > > } > > - ovs_rwlock_unlock(&ct->resources_lock); > > > > conn = conn_maybe_not_found(ct, pkt, ctx, commit, now, > > nat_action_info, > > helper, alg_exp, ct_alg_ctl, tp_id); > > @@ -2979,6 +2977,7 @@ conntrack_exp_dump_start(struct conntrack *ct, struct > > conntrack_dump *dump, > > } > > > > dump->ct = ct; > > + dump->cursor = cmap_cursor_start(&dump->ct->alg_expectations); > > > > return 0; > > } > > @@ -2986,31 +2985,16 @@ conntrack_exp_dump_start(struct conntrack *ct, > > struct conntrack_dump *dump, > > int > > conntrack_exp_dump_next(struct conntrack_dump *dump, struct ct_dpif_exp > > *entry) > > { > > - struct conntrack *ct = dump->ct; > > struct alg_exp_node *enode; > > - int ret = EOF; > > - > > - ovs_rwlock_rdlock(&ct->resources_lock); > > - > > - for (;;) { > > - struct hmap_node *node = hmap_at_position(&ct->alg_expectations, > > - &dump->hmap_pos); > > - if (!node) { > > - break; > > - } > > - > > - enode = CONTAINER_OF(node, struct alg_exp_node, node); > > > > + CMAP_CURSOR_FOR_EACH_CONTINUE (enode, node, &dump->cursor) { > > if (!dump->filter_zone || enode->key.zone == dump->zone) { > > - ret = 0; > > exp_node_to_ct_dpif_exp(enode, entry); > > - break; > > + return 0; > > } > > } > > > > - ovs_rwlock_unlock(&ct->resources_lock); > > - > > - return ret; > > + return EOF; > > } > > > > int > > @@ -3108,9 +3092,8 @@ conntrack_get_tcp_seq_chk(struct conntrack *ct) > > return enabled; > > } > > > > -/* This function must be called with the ct->resources read lock taken. */ > > static struct alg_exp_node * > > -expectation_lookup(struct hmap *alg_expectations, const struct conn_key > > *key, > > +expectation_lookup(struct cmap *alg_expectations, const struct conn_key > > *key, > > uint32_t basis, bool src_ip_wc) > > { > > struct conn_key check_key; > > @@ -3123,7 +3106,7 @@ expectation_lookup(struct hmap *alg_expectations, > > const struct conn_key *key, > > > > struct alg_exp_node *alg_exp_node; > > > > - HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, > > + CMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, > > conn_key_hash(&check_key, basis), > > alg_expectations) { > > if (!conn_key_cmp(&alg_exp_node->key, &check_key)) { > > @@ -3133,17 +3116,18 @@ expectation_lookup(struct hmap *alg_expectations, > > const struct conn_key *key, > > return NULL; > > } > > > > -/* This function must be called with the ct->resources write lock taken. */ > > +/* This function must be called with the ct->resources_lock taken. */ > > static void > > -expectation_remove(struct hmap *alg_expectations, > > +expectation_remove(struct cmap *alg_expectations, > > const struct conn_key *key, uint32_t basis) > > Is there a way to add the OVS_REQUIRES here? That would help with > lockdep analysis. It's okay if not. I think that should work. will add it in the next version. > > > { > > struct alg_exp_node *alg_exp_node; > > > > - HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, conn_key_hash(key, basis), > > + uint32_t hash = conn_key_hash(key, basis); > > + CMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, hash, > > alg_expectations) { > > if (!conn_key_cmp(&alg_exp_node->key, key)) { > > - hmap_remove(alg_expectations, &alg_exp_node->node); > > + cmap_remove(alg_expectations, &alg_exp_node->node, hash); > > break; > > } > > } > > @@ -3183,10 +3167,22 @@ expectation_ref_create(struct hindex > > *alg_expectation_refs, > > } > > } > > > > +static void > > +expectation_flush(struct conntrack *ct) > > +{ > > + struct alg_exp_node *node; > > + HINDEX_FOR_EACH_SAFE (node, node_ref, &ct->alg_expectation_refs) { > > + expectation_remove(&ct->alg_expectations, &node->key, > > + ct->hash_basis); > > + hindex_remove(&ct->alg_expectation_refs, &node->node_ref); > > + ovsrcu_postpone(free, node); > > + } > > +} > > + > > static void > > expectation_clean(struct conntrack *ct, const struct conn_key *parent_key) > > { > > - ovs_rwlock_wrlock(&ct->resources_lock); > > + ovs_mutex_lock(&ct->resources_lock); > > > > struct alg_exp_node *node; > > HINDEX_FOR_EACH_WITH_HASH_SAFE (node, node_ref, > > @@ -3196,11 +3192,11 @@ expectation_clean(struct conntrack *ct, const > > struct conn_key *parent_key) > > expectation_remove(&ct->alg_expectations, &node->key, > > ct->hash_basis); > > hindex_remove(&ct->alg_expectation_refs, &node->node_ref); > > - free(node); > > + ovsrcu_postpone(free, node); > > } > > } > > > > - ovs_rwlock_unlock(&ct->resources_lock); > > + ovs_mutex_unlock(&ct->resources_lock); > > } > > > > static void > > @@ -3260,21 +3256,21 @@ expectation_create(struct conntrack *ct, ovs_be16 > > dst_port, > > /* Take the write lock here because it is almost 100% > > * likely that the lookup will fail and > > * expectation_create() will be called below. */ > > - ovs_rwlock_wrlock(&ct->resources_lock); > > + ovs_mutex_lock(&ct->resources_lock); > > struct alg_exp_node *alg_exp = expectation_lookup( > > &ct->alg_expectations, &alg_exp_node->key, ct->hash_basis, > > src_ip_wc); > > if (alg_exp) { > > - free(alg_exp_node); > > - ovs_rwlock_unlock(&ct->resources_lock); > > + ovsrcu_postpone(free, alg_exp_node); > > This call appears at the top of the function: > > struct alg_exp_node *alg_exp_node = xzalloc(sizeof *alg_exp_node); > > and at this current point, we haven't inserted the new variable into the > cmap. I think calling out to rcu is inappropriate here. thanks, fixed Thanks for all the comments, Felix > > > + ovs_mutex_unlock(&ct->resources_lock); > > return; > > } > > > > alg_exp_node->alg_nat_repl_addr = alg_nat_repl_addr; > > - hmap_insert(&ct->alg_expectations, &alg_exp_node->node, > > + cmap_insert(&ct->alg_expectations, &alg_exp_node->node, > > conn_key_hash(&alg_exp_node->key, ct->hash_basis)); > > expectation_ref_create(&ct->alg_expectation_refs, alg_exp_node, > > ct->hash_basis); > > - ovs_rwlock_unlock(&ct->resources_lock); > > + ovs_mutex_unlock(&ct->resources_lock); > > } > > > > static void > > diff --git a/lib/conntrack.h b/lib/conntrack.h > > index c3136e955..abc859ef6 100644 > > --- a/lib/conntrack.h > > +++ b/lib/conntrack.h > > @@ -106,10 +106,7 @@ void conntrack_clear(struct dp_packet *packet); > > struct conntrack_dump { > > struct conntrack *ct; > > unsigned bucket; > > - union { > > - struct hmap_position hmap_pos; > > - struct cmap_cursor cursor; > > - }; > > + struct cmap_cursor cursor; > > bool filter_zone; > > uint16_t zone; > > uint16_t current_zone; > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
