Previously we guarded expectations using an rwlock. While there are
generally only a few or none expectation entries, taking and releasing
the rwlock in read mode still takes more time than using a cmap.

When doing a loadtest with OVN and natting across two LRs we increased
our packet rate from 2.5 Mio pps to 12 Mio pps with only 40% of the
previous load. This mainly comes from the UNSNAT and UNDNAT stages of OVN that
generally do not match anything as long as we are in not specific cases.

Co-Authored-by: Florian Werner <[email protected]>
Signed-off-by: Florian Werner <[email protected]>
Co-Authored-by: Sebastian Riese <[email protected]>
Signed-off-by: Sebastian Riese <[email protected]>
Signed-off-by: Felix Huettner <[email protected]>
---
 lib/conntrack-private.h |  9 ++---
 lib/conntrack.c         | 73 +++++++++++++++--------------------------
 lib/conntrack.h         |  5 +--
 3 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index f1132e8aa..0141afd4a 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -75,7 +75,7 @@ BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct conn_key, nw_proto) == 
sizeof(uint8_t));
  * connection. */
 struct alg_exp_node {
     /* Node in alg_expectations. */
-    struct hmap_node node;
+    struct cmap_node node;
     /* Node in alg_expectation_refs. */
     struct hindex_node node_ref;
     /* Key of data connection to be created. */
@@ -231,9 +231,10 @@ struct conntrack {
 
     /* Expectations for application level gateways (created by control
      * connections to help create data connections, e.g. for FTP). */
-    struct ovs_rwlock resources_lock; /* Protects fields below. */
-    struct hmap alg_expectations OVS_GUARDED; /* Holds struct
-                                               * alg_exp_nodes. */
+    struct ovs_mutex resources_lock; /* Protects fields below. */
+    struct cmap alg_expectations; /* Holds struct alg_exp_nodes.
+                                   * resources_lock must be held for mutating
+                                   * operations. */
     struct hindex alg_expectation_refs OVS_GUARDED; /* For lookup from
                                                      * control context.  */
 
diff --git a/lib/conntrack.c b/lib/conntrack.c
index bcce71808..d96ac2c78 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -131,7 +131,7 @@ static inline bool
 extract_l3_ipv6(struct conn_key *key, const void *data, size_t size,
                 const char **new_data);
 static struct alg_exp_node *
-expectation_lookup(struct hmap *alg_expectations, const struct conn_key *key,
+expectation_lookup(struct cmap *alg_expectations, const struct conn_key *key,
                    uint32_t basis, bool src_ip_wc);
 
 static int
@@ -254,11 +254,11 @@ conntrack_init(void)
      */
     ct->hash_basis = random_uint32();
 
-    ovs_rwlock_init(&ct->resources_lock);
-    ovs_rwlock_wrlock(&ct->resources_lock);
-    hmap_init(&ct->alg_expectations);
+    ovs_mutex_init(&ct->resources_lock);
+    ovs_mutex_lock(&ct->resources_lock);
+    cmap_init(&ct->alg_expectations);
     hindex_init(&ct->alg_expectation_refs);
-    ovs_rwlock_unlock(&ct->resources_lock);
+    ovs_mutex_unlock(&ct->resources_lock);
 
     ovs_mutex_init_adaptive(&ct->ct_lock);
     ovs_mutex_lock(&ct->ct_lock);
@@ -652,15 +652,11 @@ conntrack_destroy(struct conntrack *ct)
     ovs_mutex_unlock(&ct->ct_lock);
     ovs_mutex_destroy(&ct->ct_lock);
 
-    ovs_rwlock_wrlock(&ct->resources_lock);
-    struct alg_exp_node *alg_exp_node;
-    HMAP_FOR_EACH_POP (alg_exp_node, node, &ct->alg_expectations) {
-        free(alg_exp_node);
-    }
-    hmap_destroy(&ct->alg_expectations);
+    ovs_mutex_lock(&ct->resources_lock);
+    cmap_destroy(&ct->alg_expectations);
     hindex_destroy(&ct->alg_expectation_refs);
-    ovs_rwlock_unlock(&ct->resources_lock);
-    ovs_rwlock_destroy(&ct->resources_lock);
+    ovs_mutex_unlock(&ct->resources_lock);
+    ovs_mutex_destroy(&ct->resources_lock);
 
     ipf_destroy(ct->ipf);
     free(ct);
@@ -1473,7 +1469,6 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
 
     if (OVS_UNLIKELY(create_new_conn)) {
 
-        ovs_rwlock_rdlock(&ct->resources_lock);
         alg_exp = expectation_lookup(&ct->alg_expectations, &ctx->key,
                                      ct->hash_basis,
                                      alg_src_ip_wc(ct_alg_ctl));
@@ -1481,7 +1476,6 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
             memcpy(&alg_exp_entry, alg_exp, sizeof alg_exp_entry);
             alg_exp = &alg_exp_entry;
         }
-        ovs_rwlock_unlock(&ct->resources_lock);
 
         conn = conn_maybe_not_found(ct, pkt, ctx, commit, now, nat_action_info,
                                     helper, alg_exp, ct_alg_ctl, tp_id);
@@ -2979,6 +2973,7 @@ conntrack_exp_dump_start(struct conntrack *ct, struct 
conntrack_dump *dump,
     }
 
     dump->ct = ct;
+    dump->cursor = cmap_cursor_start(&dump->ct->alg_expectations);
 
     return 0;
 }
@@ -2986,31 +2981,16 @@ conntrack_exp_dump_start(struct conntrack *ct, struct 
conntrack_dump *dump,
 int
 conntrack_exp_dump_next(struct conntrack_dump *dump, struct ct_dpif_exp *entry)
 {
-    struct conntrack *ct = dump->ct;
     struct alg_exp_node *enode;
-    int ret = EOF;
-
-    ovs_rwlock_rdlock(&ct->resources_lock);
-
-    for (;;) {
-        struct hmap_node *node = hmap_at_position(&ct->alg_expectations,
-                                                  &dump->hmap_pos);
-        if (!node) {
-            break;
-        }
-
-        enode = CONTAINER_OF(node, struct alg_exp_node, node);
 
+    CMAP_CURSOR_FOR_EACH_CONTINUE (enode, node, &dump->cursor) {
         if (!dump->filter_zone || enode->key.zone == dump->zone) {
-            ret = 0;
             exp_node_to_ct_dpif_exp(enode, entry);
-            break;
+            return 0;
         }
     }
 
-    ovs_rwlock_unlock(&ct->resources_lock);
-
-    return ret;
+    return EOF;
 }
 
 int
@@ -3110,7 +3090,7 @@ conntrack_get_tcp_seq_chk(struct conntrack *ct)
 
 /* This function must be called with the ct->resources read lock taken. */
 static struct alg_exp_node *
-expectation_lookup(struct hmap *alg_expectations, const struct conn_key *key,
+expectation_lookup(struct cmap *alg_expectations, const struct conn_key *key,
                    uint32_t basis, bool src_ip_wc)
 {
     struct conn_key check_key;
@@ -3123,7 +3103,7 @@ expectation_lookup(struct hmap *alg_expectations, const 
struct conn_key *key,
 
     struct alg_exp_node *alg_exp_node;
 
-    HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node,
+    CMAP_FOR_EACH_WITH_HASH (alg_exp_node, node,
                              conn_key_hash(&check_key, basis),
                              alg_expectations) {
         if (!conn_key_cmp(&alg_exp_node->key, &check_key)) {
@@ -3135,15 +3115,16 @@ expectation_lookup(struct hmap *alg_expectations, const 
struct conn_key *key,
 
 /* This function must be called with the ct->resources write lock taken. */
 static void
-expectation_remove(struct hmap *alg_expectations,
+expectation_remove(struct cmap *alg_expectations,
                    const struct conn_key *key, uint32_t basis)
 {
     struct alg_exp_node *alg_exp_node;
 
-    HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, conn_key_hash(key, basis),
+    uint32_t hash = conn_key_hash(key, basis);
+    CMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, hash,
                              alg_expectations) {
         if (!conn_key_cmp(&alg_exp_node->key, key)) {
-            hmap_remove(alg_expectations, &alg_exp_node->node);
+            cmap_remove(alg_expectations, &alg_exp_node->node, hash);
             break;
         }
     }
@@ -3186,7 +3167,7 @@ expectation_ref_create(struct hindex 
*alg_expectation_refs,
 static void
 expectation_clean(struct conntrack *ct, const struct conn_key *parent_key)
 {
-    ovs_rwlock_wrlock(&ct->resources_lock);
+    ovs_mutex_lock(&ct->resources_lock);
 
     struct alg_exp_node *node;
     HINDEX_FOR_EACH_WITH_HASH_SAFE (node, node_ref,
@@ -3196,11 +3177,11 @@ expectation_clean(struct conntrack *ct, const struct 
conn_key *parent_key)
             expectation_remove(&ct->alg_expectations, &node->key,
                                ct->hash_basis);
             hindex_remove(&ct->alg_expectation_refs, &node->node_ref);
-            free(node);
+            ovsrcu_postpone(free, node);
         }
     }
 
-    ovs_rwlock_unlock(&ct->resources_lock);
+    ovs_mutex_unlock(&ct->resources_lock);
 }
 
 static void
@@ -3260,21 +3241,21 @@ expectation_create(struct conntrack *ct, ovs_be16 
dst_port,
     /* Take the write lock here because it is almost 100%
      * likely that the lookup will fail and
      * expectation_create() will be called below. */
-    ovs_rwlock_wrlock(&ct->resources_lock);
+    ovs_mutex_lock(&ct->resources_lock);
     struct alg_exp_node *alg_exp = expectation_lookup(
         &ct->alg_expectations, &alg_exp_node->key, ct->hash_basis, src_ip_wc);
     if (alg_exp) {
-        free(alg_exp_node);
-        ovs_rwlock_unlock(&ct->resources_lock);
+        ovsrcu_postpone(free, alg_exp_node);
+        ovs_mutex_unlock(&ct->resources_lock);
         return;
     }
 
     alg_exp_node->alg_nat_repl_addr = alg_nat_repl_addr;
-    hmap_insert(&ct->alg_expectations, &alg_exp_node->node,
+    cmap_insert(&ct->alg_expectations, &alg_exp_node->node,
                 conn_key_hash(&alg_exp_node->key, ct->hash_basis));
     expectation_ref_create(&ct->alg_expectation_refs, alg_exp_node,
                            ct->hash_basis);
-    ovs_rwlock_unlock(&ct->resources_lock);
+    ovs_mutex_unlock(&ct->resources_lock);
 }
 
 static void
diff --git a/lib/conntrack.h b/lib/conntrack.h
index c3136e955..abc859ef6 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -106,10 +106,7 @@ void conntrack_clear(struct dp_packet *packet);
 struct conntrack_dump {
     struct conntrack *ct;
     unsigned bucket;
-    union {
-        struct hmap_position hmap_pos;
-        struct cmap_cursor cursor;
-    };
+    struct cmap_cursor cursor;
     bool filter_zone;
     uint16_t zone;
     uint16_t current_zone;
-- 
2.43.0



   
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to