Am Fri, May 29, 2026 at 12:22:18PM -0400 schrieb Aaron Conole via dev:
> Felix Huettner via dev <[email protected]> writes:
> 
> > Previously we guarded expectations using an rwlock. While there are
> > generally only a few or none expectation entries, taking and releasing
> > the rwlock in read mode still takes more time than using a cmap.
> >
> > When doing a loadtest with OVN and natting across two LRs we increased
> > our packet rate from 2.5 Mio pps to 12 Mio pps with only 40% of the
> > previous load. This mainly comes from the UNSNAT and UNDNAT stages of OVN 
> > that
> > generally do not match anything as long as we are in not specific cases.
> >
> > Co-Authored-by: Florian Werner <[email protected]>
> > Signed-off-by: Florian Werner <[email protected]>
> > Co-Authored-by: Sebastian Riese <[email protected]>
> > Signed-off-by: Sebastian Riese <[email protected]>
> > Signed-off-by: Felix Huettner <[email protected]>
> > ---
> >  lib/conntrack-private.h |  9 ++---
> >  lib/conntrack.c         | 73 +++++++++++++++--------------------------
> >  lib/conntrack.h         |  5 +--
> >  3 files changed, 33 insertions(+), 54 deletions(-)
> >
> > diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> > index f1132e8aa..0141afd4a 100644
> > --- a/lib/conntrack-private.h
> > +++ b/lib/conntrack-private.h
> > @@ -75,7 +75,7 @@ BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct conn_key, 
> > nw_proto) == sizeof(uint8_t));
> >   * connection. */
> >  struct alg_exp_node {
> >      /* Node in alg_expectations. */
> > -    struct hmap_node node;
> > +    struct cmap_node node;
> >      /* Node in alg_expectation_refs. */
> >      struct hindex_node node_ref;
> >      /* Key of data connection to be created. */
> > @@ -231,9 +231,10 @@ struct conntrack {
> >  
> >      /* Expectations for application level gateways (created by control
> >       * connections to help create data connections, e.g. for FTP). */
> > -    struct ovs_rwlock resources_lock; /* Protects fields below. */
> > -    struct hmap alg_expectations OVS_GUARDED; /* Holds struct
> > -                                               * alg_exp_nodes. */
> > +    struct ovs_mutex resources_lock; /* Protects fields below. */
> > +    struct cmap alg_expectations; /* Holds struct alg_exp_nodes.
> > +                                   * resources_lock must be held for 
> > mutating
> > +                                   * operations. */
> >      struct hindex alg_expectation_refs OVS_GUARDED; /* For lookup from
> >                                                       * control context.  */
> >  
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index bcce71808..d96ac2c78 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -131,7 +131,7 @@ static inline bool
> >  extract_l3_ipv6(struct conn_key *key, const void *data, size_t size,
> >                  const char **new_data);
> >  static struct alg_exp_node *
> > -expectation_lookup(struct hmap *alg_expectations, const struct conn_key 
> > *key,
> > +expectation_lookup(struct cmap *alg_expectations, const struct conn_key 
> > *key,
> >                     uint32_t basis, bool src_ip_wc);
> >  
> >  static int
> > @@ -254,11 +254,11 @@ conntrack_init(void)
> >       */
> >      ct->hash_basis = random_uint32();
> >  
> > -    ovs_rwlock_init(&ct->resources_lock);
> > -    ovs_rwlock_wrlock(&ct->resources_lock);
> > -    hmap_init(&ct->alg_expectations);
> > +    ovs_mutex_init(&ct->resources_lock);
> > +    ovs_mutex_lock(&ct->resources_lock);
> > +    cmap_init(&ct->alg_expectations);
> >      hindex_init(&ct->alg_expectation_refs);
> > -    ovs_rwlock_unlock(&ct->resources_lock);
> > +    ovs_mutex_unlock(&ct->resources_lock);
> >  
> >      ovs_mutex_init_adaptive(&ct->ct_lock);
> >      ovs_mutex_lock(&ct->ct_lock);
> > @@ -652,15 +652,11 @@ conntrack_destroy(struct conntrack *ct)
> >      ovs_mutex_unlock(&ct->ct_lock);
> >      ovs_mutex_destroy(&ct->ct_lock);
> >  
> > -    ovs_rwlock_wrlock(&ct->resources_lock);
> > -    struct alg_exp_node *alg_exp_node;
> > -    HMAP_FOR_EACH_POP (alg_exp_node, node, &ct->alg_expectations) {
> > -        free(alg_exp_node);
> > -    }
> > -    hmap_destroy(&ct->alg_expectations);
> > +    ovs_mutex_lock(&ct->resources_lock);
> > +    cmap_destroy(&ct->alg_expectations);
> 
> NOTE above, you eliminate the expectations flushing code, but don't
> actually delete.  These are previously directly freed here, so I suggest
> keeping the direct free here.

Hi Aaron,

thanks for the review.
I added it in patch 4, but then i will just include this here.

> 
> >      hindex_destroy(&ct->alg_expectation_refs);
> > -    ovs_rwlock_unlock(&ct->resources_lock);
> > -    ovs_rwlock_destroy(&ct->resources_lock);
> > +    ovs_mutex_unlock(&ct->resources_lock);
> > +    ovs_mutex_destroy(&ct->resources_lock);
> >  
> >      ipf_destroy(ct->ipf);
> >      free(ct);
> > @@ -1473,7 +1469,6 @@ process_one(struct conntrack *ct, struct dp_packet 
> > *pkt,
> >  
> >      if (OVS_UNLIKELY(create_new_conn)) {
> >  
> > -        ovs_rwlock_rdlock(&ct->resources_lock);
> >          alg_exp = expectation_lookup(&ct->alg_expectations, &ctx->key,
> >                                       ct->hash_basis,
> >                                       alg_src_ip_wc(ct_alg_ctl));
> > @@ -1481,7 +1476,6 @@ process_one(struct conntrack *ct, struct dp_packet 
> > *pkt,
> >              memcpy(&alg_exp_entry, alg_exp, sizeof alg_exp_entry);
> >              alg_exp = &alg_exp_entry;
> >          }
> > -        ovs_rwlock_unlock(&ct->resources_lock);
> >  
> >          conn = conn_maybe_not_found(ct, pkt, ctx, commit, now, 
> > nat_action_info,
> >                                      helper, alg_exp, ct_alg_ctl, tp_id);
> > @@ -2979,6 +2973,7 @@ conntrack_exp_dump_start(struct conntrack *ct, struct 
> > conntrack_dump *dump,
> >      }
> >  
> >      dump->ct = ct;
> > +    dump->cursor = cmap_cursor_start(&dump->ct->alg_expectations);
> >  
> >      return 0;
> >  }
> > @@ -2986,31 +2981,16 @@ conntrack_exp_dump_start(struct conntrack *ct, 
> > struct conntrack_dump *dump,
> >  int
> >  conntrack_exp_dump_next(struct conntrack_dump *dump, struct ct_dpif_exp 
> > *entry)
> >  {
> > -    struct conntrack *ct = dump->ct;
> >      struct alg_exp_node *enode;
> > -    int ret = EOF;
> > -
> > -    ovs_rwlock_rdlock(&ct->resources_lock);
> > -
> > -    for (;;) {
> > -        struct hmap_node *node = hmap_at_position(&ct->alg_expectations,
> > -                                                  &dump->hmap_pos);
> > -        if (!node) {
> > -            break;
> > -        }
> > -
> > -        enode = CONTAINER_OF(node, struct alg_exp_node, node);
> >  
> > +    CMAP_CURSOR_FOR_EACH_CONTINUE (enode, node, &dump->cursor) {
> >          if (!dump->filter_zone || enode->key.zone == dump->zone) {
> > -            ret = 0;
> >              exp_node_to_ct_dpif_exp(enode, entry);
> > -            break;
> > +            return 0;
> >          }
> >      }
> >  
> > -    ovs_rwlock_unlock(&ct->resources_lock);
> > -
> > -    return ret;
> > +    return EOF;
> >  }
> >  
> >  int
> > @@ -3110,7 +3090,7 @@ conntrack_get_tcp_seq_chk(struct conntrack *ct)
> >  
> >  /* This function must be called with the ct->resources read lock taken. */
> 
> I guess this isn't a rw lock anymore.  Also is this comment a bit stale
> now, since readers shouldn't need a lock?

Thanks, removed.

> 
> >  static struct alg_exp_node *
> > -expectation_lookup(struct hmap *alg_expectations, const struct conn_key 
> > *key,
> > +expectation_lookup(struct cmap *alg_expectations, const struct conn_key 
> > *key,
> >                     uint32_t basis, bool src_ip_wc)
> >  {
> >      struct conn_key check_key;
> > @@ -3123,7 +3103,7 @@ expectation_lookup(struct hmap *alg_expectations, 
> > const struct conn_key *key,
> >  
> >      struct alg_exp_node *alg_exp_node;
> >  
> > -    HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node,
> > +    CMAP_FOR_EACH_WITH_HASH (alg_exp_node, node,
> >                               conn_key_hash(&check_key, basis),
> >                               alg_expectations) {
> >          if (!conn_key_cmp(&alg_exp_node->key, &check_key)) {
> > @@ -3135,15 +3115,16 @@ expectation_lookup(struct hmap *alg_expectations, 
> > const struct conn_key *key,
> >  
> >  /* This function must be called with the ct->resources write lock taken. */
> 
> This should just say lock, right?

fixed.

Thanks a lot,
Felix

> 
> >  static void
> > -expectation_remove(struct hmap *alg_expectations,
> > +expectation_remove(struct cmap *alg_expectations,
> >                     const struct conn_key *key, uint32_t basis)
> >  {
> >      struct alg_exp_node *alg_exp_node;
> >  
> > -    HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, conn_key_hash(key, basis),
> > +    uint32_t hash = conn_key_hash(key, basis);
> > +    CMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, hash,
> >                               alg_expectations) {
> >          if (!conn_key_cmp(&alg_exp_node->key, key)) {
> > -            hmap_remove(alg_expectations, &alg_exp_node->node);
> > +            cmap_remove(alg_expectations, &alg_exp_node->node, hash);
> >              break;
> >          }
> >      }
> > @@ -3186,7 +3167,7 @@ expectation_ref_create(struct hindex 
> > *alg_expectation_refs,
> >  static void
> >  expectation_clean(struct conntrack *ct, const struct conn_key *parent_key)
> >  {
> > -    ovs_rwlock_wrlock(&ct->resources_lock);
> > +    ovs_mutex_lock(&ct->resources_lock);
> >  
> >      struct alg_exp_node *node;
> >      HINDEX_FOR_EACH_WITH_HASH_SAFE (node, node_ref,
> > @@ -3196,11 +3177,11 @@ expectation_clean(struct conntrack *ct, const 
> > struct conn_key *parent_key)
> >              expectation_remove(&ct->alg_expectations, &node->key,
> >                                 ct->hash_basis);
> >              hindex_remove(&ct->alg_expectation_refs, &node->node_ref);
> > -            free(node);
> > +            ovsrcu_postpone(free, node);
> >          }
> >      }
> >  
> > -    ovs_rwlock_unlock(&ct->resources_lock);
> > +    ovs_mutex_unlock(&ct->resources_lock);
> >  }
> >  
> >  static void
> > @@ -3260,21 +3241,21 @@ expectation_create(struct conntrack *ct, ovs_be16 
> > dst_port,
> >      /* Take the write lock here because it is almost 100%
> >       * likely that the lookup will fail and
> >       * expectation_create() will be called below. */
> > -    ovs_rwlock_wrlock(&ct->resources_lock);
> > +    ovs_mutex_lock(&ct->resources_lock);
> >      struct alg_exp_node *alg_exp = expectation_lookup(
> >          &ct->alg_expectations, &alg_exp_node->key, ct->hash_basis, 
> > src_ip_wc);
> >      if (alg_exp) {
> > -        free(alg_exp_node);
> > -        ovs_rwlock_unlock(&ct->resources_lock);
> > +        ovsrcu_postpone(free, alg_exp_node);
> > +        ovs_mutex_unlock(&ct->resources_lock);
> >          return;
> >      }
> >  
> >      alg_exp_node->alg_nat_repl_addr = alg_nat_repl_addr;
> > -    hmap_insert(&ct->alg_expectations, &alg_exp_node->node,
> > +    cmap_insert(&ct->alg_expectations, &alg_exp_node->node,
> >                  conn_key_hash(&alg_exp_node->key, ct->hash_basis));
> >      expectation_ref_create(&ct->alg_expectation_refs, alg_exp_node,
> >                             ct->hash_basis);
> > -    ovs_rwlock_unlock(&ct->resources_lock);
> > +    ovs_mutex_unlock(&ct->resources_lock);
> >  }
> >  
> >  static void
> > diff --git a/lib/conntrack.h b/lib/conntrack.h
> > index c3136e955..abc859ef6 100644
> > --- a/lib/conntrack.h
> > +++ b/lib/conntrack.h
> > @@ -106,10 +106,7 @@ void conntrack_clear(struct dp_packet *packet);
> >  struct conntrack_dump {
> >      struct conntrack *ct;
> >      unsigned bucket;
> > -    union {
> > -        struct hmap_position hmap_pos;
> > -        struct cmap_cursor cursor;
> > -    };
> > +    struct cmap_cursor cursor;
> >      bool filter_zone;
> >      uint16_t zone;
> >      uint16_t current_zone;
> 
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to