Hi Solomon

There was a typo in nat_clean(...)

     ct_rwlock_unlock(&ct->resources_lock);
     ct_lock_unlock(&ctb->lock);
     uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
-    unsigned bucket_rev_conn =
-        hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
+    unsigned bucket_rev_conn = hash_to_bucket(hash);
     ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
     ct_rwlock_wrlock(&ct->resources_lock);

I also cleaned up the patch a little more and ended up with:

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 5410ab4..68d9816 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const struct
conn_key *key2)
     return 1;
 }

+static bool
+conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
+                 const struct conn_key *key)
+    OVS_REQUIRES(ctb->lock)
+{
+    struct conn *conn;
+    uint32_t hash = conn_key_hash(key, ct->hash_basis);
+
+    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+        if (!conn_key_cmp(&conn->key, key)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static void
 ct_print_conn_info(const struct conn *c, const char *log_msg,
                    enum vlog_level vll, bool force, bool rl_on)
@@ -738,29 +754,53 @@ un_nat_packet(struct dp_packet *pkt, const struct
conn *conn,
     }
 }

-/* Typical usage of this helper is in non per-packet code;
- * this is because the bucket lock needs to be held for lookup
- * and a hash would have already been needed. Hence, this function
- * is just intended for code clarity. */
+/* This function is called with the bucket lock held. */
 static struct conn *
-conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
now)
+conn_lookup_any(const struct conn_key *key,
+                const struct conntrack_bucket *ctb, uint32_t hash)
 {
-    struct conn_lookup_ctx ctx;
-    ctx.conn = NULL;
-    ctx.key = *key;
-    ctx.hash = conn_key_hash(key, ct->hash_basis);
-    unsigned bucket = hash_to_bucket(ctx.hash);
-    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
-    return ctx.conn;
+    struct conn *conn = NULL;
+
+    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+        if (!conn_key_cmp(&conn->key, key)) {
+            break;
+        }
+        if (!conn_key_cmp(&conn->rev_key, key)) {
+            break;
+        }
+    }
+    return conn;
+}
+
+/* This function is called with the bucket lock held. */
+static struct conn *
+conn_lookup_unnat(const struct conn_key *key,
+                  const struct conntrack_bucket *ctb, uint32_t hash)
+{
+    struct conn *conn = NULL;
+
+    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+        if (!conn_key_cmp(&conn->key, key)
+            && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
+            break;
+        }
+    }
+    return conn;
 }

 static void
 conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
                   long long now, int seq_skew, bool seq_skew_dir)
 {
-    unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
+    uint32_t hash = conn_key_hash(key, ct->hash_basis);
+    unsigned bucket = hash_to_bucket(hash);
     ct_lock_lock(&ct->buckets[bucket].lock);
-    struct conn *conn = conn_lookup(ct, key, now);
+    struct conn_lookup_ctx ctx;
+    ctx.key = *key;
+    ctx.hash = hash;
+    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
+    struct conn *conn = ctx.conn;
+
     if (conn && seq_skew) {
         conn->seq_skew = seq_skew;
         conn->seq_skew_dir = seq_skew_dir;
@@ -777,12 +817,13 @@ nat_clean(struct conntrack *ct, struct conn *conn,
     nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
ct->hash_basis);
     ct_rwlock_unlock(&ct->resources_lock);
     ct_lock_unlock(&ctb->lock);
-    unsigned bucket_rev_conn =
-        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
+    uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
+    unsigned bucket_rev_conn = hash_to_bucket(hash);
     ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
     ct_rwlock_wrlock(&ct->resources_lock);
-    long long now = time_msec();
-    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
+    struct conn *rev_conn = conn_lookup_unnat(&conn->rev_key,
+
&ct->buckets[bucket_rev_conn],
+                                              hash);
     struct nat_conn_key_node *nat_conn_key_node =
         nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
                              ct->hash_basis);
@@ -812,7 +853,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
         expectation_clean(ct, &conn->key, ct->hash_basis);
     }
     ovs_list_remove(&conn->exp_node);
-    hmap_remove(&ctb->connections, &conn->node);
+
+    /* Temporary debug check. */
+    if (conn_key_present(ct, ctb, &conn->key)) {
+        hmap_remove(&ctb->connections, &conn->node);
+    } else {
+        char *log_msg = xasprintf("conn_clean: conn not present in hmap");
+        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
+        free(log_msg);
+    }
+
     atomic_count_dec(&ct->n_conn);
     if (conn->nat_info) {
         nat_clean(ct, conn, ctb);
@@ -1005,7 +1055,7 @@ conn_update_state(struct conntrack *ct, struct
dp_packet *pkt,

 static void
 create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
-                   long long now, bool alg_un_nat)
+                   bool alg_un_nat)
 {
     struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
     nc->key = conn_for_un_nat_copy->rev_key;
@@ -1013,7 +1063,9 @@ create_un_nat_conn(struct conntrack *ct, struct conn
*conn_for_un_nat_copy,
     uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
     unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
     ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
-    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
+    struct conn *rev_conn = conn_lookup_any(&nc->key,
+
&ct->buckets[un_nat_conn_bucket],
+                                            un_nat_hash);

     if (alg_un_nat) {
         if (!rev_conn) {
@@ -1022,7 +1074,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
*conn_for_un_nat_copy,
         } else {
             char *log_msg = xasprintf("Unusual condition for un_nat conn "
                                       "create for alg: rev_conn %p",
rev_conn);
-            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
             free(log_msg);
             free(nc);
         }
@@ -1030,16 +1082,26 @@ create_un_nat_conn(struct conntrack *ct, struct
conn *conn_for_un_nat_copy,
         ct_rwlock_rdlock(&ct->resources_lock);

         struct nat_conn_key_node *nat_conn_key_node =
-            nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
ct->hash_basis);
+           nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
ct->hash_basis);
         if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
-            &nc->rev_key) && !rev_conn) {
-            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
-                        &nc->node, un_nat_hash);
+                                               &nc->rev_key)) {
+            if (!rev_conn) {
+                hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
+                            &nc->node, un_nat_hash);
+            } else {
+                char *log_msg = xasprintf("NAT conflict; un-nat will not"
+                                          "happen; likely DOS");
+                ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
+                free(log_msg);
+                nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
+                                     ct->hash_basis);
+                free(nc);
+            }
         } else {
             char *log_msg = xasprintf("Unusual condition for un_nat conn "
                                       "create: nat_conn_key_node/rev_conn "
-                                      "%p/%p", nat_conn_key_node,
rev_conn);
-            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+                                      "%p", nat_conn_key_node);
+            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
             free(log_msg);
             free(nc);
         }
@@ -1286,7 +1348,7 @@ process_one(struct conntrack *ct, struct dp_packet
*pkt,
     ct_lock_unlock(&ct->buckets[bucket].lock);

     if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
-        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
+        create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
     }

     handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
@@ -1383,19 +1445,18 @@ sweep_bucket(struct conntrack *ct, struct
conntrack_bucket *ctb,

     for (unsigned i = 0; i < N_CT_TM; i++) {
         LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
-            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
-                if (!conn_expired(conn, now) || count >= limit) {
-                    min_expiration = MIN(min_expiration, conn->expiration);
-                    if (count >= limit) {
-                        /* Do not check other lists. */
-                        COVERAGE_INC(conntrack_long_cleanup);
-                        return min_expiration;
-                    }
-                    break;
+            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+            if (!conn_expired(conn, now) || count >= limit) {
+                min_expiration = MIN(min_expiration, conn->expiration);
+                if (count >= limit) {
+                    /* Do not check other lists. */
+                    COVERAGE_INC(conntrack_long_cleanup);
+                    return min_expiration;
                 }
-                conn_clean(ct, conn, ctb);
-                count++;
+                break;
             }
+            conn_clean(ct, conn, ctb);
+            count++;
         }
     }
     return min_expiration;
@@ -2540,16 +2601,18 @@ int
 conntrack_flush(struct conntrack *ct, const uint16_t *zone)
 {
     for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
-        struct conn *conn, *next;
-
-        ct_lock_lock(&ct->buckets[i].lock);
-        HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections)
{
-            if ((!zone || *zone == conn->key.zone) &&
-                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
-                conn_clean(ct, conn, &ct->buckets[i]);
+        struct conntrack_bucket *ctb = &ct->buckets[i];
+        ct_lock_lock(&ctb->lock);
+        for (unsigned j = 0; j < N_CT_TM; j++) {
+            struct conn *conn, *next;
+            LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
+                if (!zone || *zone == conn->key.zone) {
+                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+                    conn_clean(ct, conn, ctb);
+                }
             }
         }
-        ct_lock_unlock(&ct->buckets[i].lock);
+        ct_lock_unlock(&ctb->lock);
     }

     return 0;
(END)




On Fri, Mar 8, 2019 at 8:07 AM Darrell Ball <[email protected]> wrote:

> Thanks for confirming Solomon; can you check the following patch on your
> side as well ?
>
> Darrell
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 5410ab4..c6b06d6 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
> struct conn_key *key2)
>      return 1;
>  }
>
> +static bool
> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> +                 const struct conn_key *key)
> +    OVS_REQUIRES(ctb->lock)
> +{
> +    struct conn *conn;
> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
>  static void
>  ct_print_conn_info(const struct conn *c, const char *log_msg,
>                     enum vlog_level vll, bool force, bool rl_on)
> @@ -738,29 +754,55 @@ un_nat_packet(struct dp_packet *pkt, const struct
> conn *conn,
>      }
>  }
>
> -/* Typical usage of this helper is in non per-packet code;
> - * this is because the bucket lock needs to be held for lookup
> - * and a hash would have already been needed. Hence, this function
> - * is just intended for code clarity. */
>  static struct conn *
> -conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
> now)
> +conn_lookup_any(struct conntrack *ct, const struct conn_key *key,
> +                uint32_t hash)
>  {
> -    struct conn_lookup_ctx ctx;
> -    ctx.conn = NULL;
> -    ctx.key = *key;
> -    ctx.hash = conn_key_hash(key, ct->hash_basis);
> -    unsigned bucket = hash_to_bucket(ctx.hash);
> -    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> -    return ctx.conn;
> +    unsigned bucket = hash_to_bucket(hash);
> +    struct conntrack_bucket *ctb = &ct->buckets[bucket];
> +    struct conn *conn = NULL;
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)) {
> +            break;
> +        }
> +        if (!conn_key_cmp(&conn->rev_key, key)) {
> +            break;
> +        }
> +    }
> +    return conn;
> +}
> +
> +static struct conn *
> +conn_lookup_unnat(struct conntrack *ct, const struct conn_key *key,
> +                  uint32_t hash)
> +{
> +    unsigned bucket = hash_to_bucket(hash);
> +    struct conntrack_bucket *ctb = &ct->buckets[bucket];
> +    struct conn *conn = NULL;
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)
> +            && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> +            break;
> +        }
> +    }
> +    return conn;
>  }
>
>  static void
>  conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
>                    long long now, int seq_skew, bool seq_skew_dir)
>  {
> -    unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> +    unsigned bucket = hash_to_bucket(hash);
>      ct_lock_lock(&ct->buckets[bucket].lock);
> -    struct conn *conn = conn_lookup(ct, key, now);
> +    struct conn_lookup_ctx ctx;
> +    ctx.key = *key;
> +    ctx.hash = hash;
> +    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> +    struct conn *conn = ctx.conn;
> +
>      if (conn && seq_skew) {
>          conn->seq_skew = seq_skew;
>          conn->seq_skew_dir = seq_skew_dir;
> @@ -777,12 +819,12 @@ nat_clean(struct conntrack *ct, struct conn *conn,
>      nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
> ct->hash_basis);
>      ct_rwlock_unlock(&ct->resources_lock);
>      ct_lock_unlock(&ctb->lock);
> +    uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
>      unsigned bucket_rev_conn =
> -        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
> +        hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
>      ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
>      ct_rwlock_wrlock(&ct->resources_lock);
> -    long long now = time_msec();
> -    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
> +    struct conn *rev_conn = conn_lookup_unnat(ct, &conn->rev_key, hash);
>      struct nat_conn_key_node *nat_conn_key_node =
>          nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
>                               ct->hash_basis);
> @@ -812,7 +854,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>          expectation_clean(ct, &conn->key, ct->hash_basis);
>      }
>      ovs_list_remove(&conn->exp_node);
> -    hmap_remove(&ctb->connections, &conn->node);
> +
> +    /* Temporary debug check. */
> +    if (conn_key_present(ct, ctb, &conn->key)) {
> +        hmap_remove(&ctb->connections, &conn->node);
> +    } else {
> +        char *log_msg = xasprintf("conn_clean: conn not present in hmap");
> +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> +        free(log_msg);
> +    }
> +
>      atomic_count_dec(&ct->n_conn);
>      if (conn->nat_info) {
>          nat_clean(ct, conn, ctb);
> @@ -1005,7 +1056,7 @@ conn_update_state(struct conntrack *ct, struct
> dp_packet *pkt,
>
>  static void
>  create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
> -                   long long now, bool alg_un_nat)
> +                   bool alg_un_nat)
>  {
>      struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
>      nc->key = conn_for_un_nat_copy->rev_key;
> @@ -1013,7 +1064,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
>      uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
>      unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
>      ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
> -    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
> +    struct conn *rev_conn = conn_lookup_any(ct, &nc->key, un_nat_hash);
>
>      if (alg_un_nat) {
>          if (!rev_conn) {
> @@ -1022,7 +1073,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
>          } else {
>              char *log_msg = xasprintf("Unusual condition for un_nat conn "
>                                        "create for alg: rev_conn %p",
> rev_conn);
> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>              free(log_msg);
>              free(nc);
>          }
> @@ -1030,16 +1081,26 @@ create_un_nat_conn(struct conntrack *ct, struct
> conn *conn_for_un_nat_copy,
>          ct_rwlock_rdlock(&ct->resources_lock);
>
>          struct nat_conn_key_node *nat_conn_key_node =
> -            nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> ct->hash_basis);
> +           nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> ct->hash_basis);
>          if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
> -            &nc->rev_key) && !rev_conn) {
> -            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> -                        &nc->node, un_nat_hash);
> +                                               &nc->rev_key)) {
> +            if (!rev_conn) {
> +                hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> +                            &nc->node, un_nat_hash);
> +            } else {
> +                char *log_msg = xasprintf("NAT conflict; un-nat will not"
> +                                          "happen; likely DOS");
> +                ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> +                free(log_msg);
> +                nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
> +                                     ct->hash_basis);
> +                free(nc);
> +            }
>          } else {
>              char *log_msg = xasprintf("Unusual condition for un_nat conn "
>                                        "create: nat_conn_key_node/rev_conn
> "
> -                                      "%p/%p", nat_conn_key_node,
> rev_conn);
> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> +                                      "%p", nat_conn_key_node);
> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>              free(log_msg);
>              free(nc);
>          }
> @@ -1286,7 +1347,7 @@ process_one(struct conntrack *ct, struct dp_packet
> *pkt,
>      ct_lock_unlock(&ct->buckets[bucket].lock);
>
>      if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
> -        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
> +        create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
>      }
>
>      handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
> @@ -1383,19 +1444,18 @@ sweep_bucket(struct conntrack *ct, struct
> conntrack_bucket *ctb,
>
>      for (unsigned i = 0; i < N_CT_TM; i++) {
>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -                if (!conn_expired(conn, now) || count >= limit) {
> -                    min_expiration = MIN(min_expiration,
> conn->expiration);
> -                    if (count >= limit) {
> -                        /* Do not check other lists. */
> -                        COVERAGE_INC(conntrack_long_cleanup);
> -                        return min_expiration;
> -                    }
> -                    break;
> +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> +            if (!conn_expired(conn, now) || count >= limit) {
> +                min_expiration = MIN(min_expiration, conn->expiration);
> +                if (count >= limit) {
> +                    /* Do not check other lists. */
> +                    COVERAGE_INC(conntrack_long_cleanup);
> +                    return min_expiration;
>                  }
> -                conn_clean(ct, conn, ctb);
> -                count++;
> +                break;
>              }
> +            conn_clean(ct, conn, ctb);
> +            count++;
>          }
>      }
>      return min_expiration;
> @@ -2540,16 +2600,18 @@ int
>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>  {
>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> -        struct conn *conn, *next;
> -
> -        ct_lock_lock(&ct->buckets[i].lock);
> -        HMAP_FOR_EACH_SAFE (conn, next, node,
> &ct->buckets[i].connections) {
> -            if ((!zone || *zone == conn->key.zone) &&
> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> -                conn_clean(ct, conn, &ct->buckets[i]);
> +        struct conntrack_bucket *ctb = &ct->buckets[i];
> +        ct_lock_lock(&ctb->lock);
> +        for (unsigned j = 0; j < N_CT_TM; j++) {
> +            struct conn *conn, *next;
> +            LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j])
> {
> +                if (!zone || *zone == conn->key.zone) {
> +                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> +                    conn_clean(ct, conn, ctb);
> +                }
>              }
>          }
> -        ct_lock_unlock(&ct->buckets[i].lock);
> +        ct_lock_unlock(&ctb->lock);
>      }
>
>      return 0;
> (END)
>
>
> On Thu, Mar 7, 2019 at 5:30 PM Li Wei <[email protected]> wrote:
>
>> hi darrell:
>>
>> I set nat action with
>> actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>>
>> With your patch, new double free panic happens in conntrack_flush() and
>> sweep_bucket():
>>
>> ==1st panic==
>> [Thread debugging using libthread_db enabled]
>> Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>> -vconsole:emer -vsyslog:err -vfi'.
>> Program terminated with signal SIGABRT, Aborted.
>> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>> 51      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
>> [Current thread is 1 (Thread 0x7f92b122cb00 (LWP 2387347))]
>> (gdb) bt
>> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>> #1  0x00007f92af62a42a in __GI_abort () at abort.c:89
>> #2  0x00007f92af666c00 in __libc_message (do_abort=do_abort@entry=2,
>> fmt=fmt@entry=0x7f92af75bd98 "*** Error in `%s': %s: 0x%s ***\n")
>>     at ../sysdeps/posix/libc_fatal.c:175
>> #3  0x00007f92af66cfc6 in malloc_printerr (action=3, str=0x7f92af75bef0
>> "double free or corruption (fasttop)", ptr=<optimized out>,
>> ar_ptr=<optimized out>)
>>     at malloc.c:5049
>> #4  0x00007f92af66d80e in _int_free (av=0x7f8bb4000020, p=0x7f8bb70ef770,
>> have_lock=0) at malloc.c:3905
>> #5  0x00005622735d3f90 in delete_conn (conn=conn@entry=0x7f8bb70ef670)
>> at lib/conntrack.c:2380
>> #6  0x00005622735d52ad in nat_clean (ctb=0x7f92b10da7f0,
>> conn=0x7f8bb70ef670, ct=0x7f92b10d5d98) at lib/conntrack.c:816
>> #7  conn_clean (ct=ct@entry=0x7f92b10d5d98, conn=0x7f8bb70ef670,
>> ctb=ctb@entry=0x7f92b10da7f0) at lib/conntrack.c:842
>> #8  0x00005622735da710 in conntrack_flush (ct=0x7f92b10d5d98, zone=0x0)
>> at lib/conntrack.c:2574
>> #9  0x00005622734bfb39 in ct_dpif_flush (dpif=0x5622758671d0,
>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
>> #10 0x00005622735de860 in dpctl_flush_conntrack (argc=argc@entry=1,
>> argv=argv@entry=0x56227589cc40, dpctl_p=dpctl_p@entry=0x7fff9087fe90) at
>> lib/dpctl.c:1388
>> #11 0x00005622735dbc38 in dpctl_unixctl_handler (conn=0x56227589bc90,
>> argc=1, argv=0x56227589cc40, aux=0x5622735de6d0 <dpctl_flush_conntrack>) at
>> lib/dpctl.c:2312
>> #12 0x000056227357d6ea in process_command (request=<optimized out>,
>> conn=0x56227589bc90) at lib/unixctl.c:308
>> #13 run_connection (conn=0x56227589bc90) at lib/unixctl.c:342
>> #14 unixctl_server_run (server=0x5622757af230) at lib/unixctl.c:393
>> #15 0x0000562273101217 in main (argc=<optimized out>, argv=<optimized
>> out>) at vswitchd/ovs-vswitchd.c:126
>>
>> The debug info in /var/log/openvswitch/ovs-vswitchd.log:
>> 70 2019-03-08T00:54:31.278Z|00227|conntrack|WARN|conn_clean: conn not
>> present in hmap: src ip 32.248.14.183 dst ip 222.15.63.163 rev src ip
>> 222.15.63.163 rev dst ip     172.112.98.138 src/dst ports 54957/80 rev
>> src/dst ports 80/54957 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
>>
>> ==sec panic==
>> (gdb) bt
>> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>> #1  0x00007faece4b642a in __GI_abort () at abort.c:89
>> #2  0x00007faece4f2c00 in __libc_message (do_abort=do_abort@entry=2,
>> fmt=fmt@entry=0x7faece5e7d98 "*** Error in `%s': %s: 0x%s ***\n")
>>     at ../sysdeps/posix/libc_fatal.c:175
>> #3  0x00007faece4f8fc6 in malloc_printerr (action=3, str=0x7faece5e7e60
>> "double free or corruption (out)", ptr=<optimized out>, ar_ptr=<optimized
>> out>)
>>     at malloc.c:5049
>> #4  0x00007faece4f980e in _int_free (av=0x7faece81bb00 <main_arena>,
>> p=0x7fa11c50ee30, have_lock=0) at malloc.c:3905
>> #5  0x0000562c63ac82ad in nat_clean (ctb=0x7faecff65cf8,
>> conn=0x7fa11c50ee40, ct=0x7faecff61d98) at lib/conntrack.c:816
>> #6  conn_clean (ct=ct@entry=0x7faecff61d98, conn=0x7fa11c50ee40,
>> ctb=ctb@entry=0x7faecff65cf8) at lib/conntrack.c:842
>> #7  0x0000562c63ac8639 in sweep_bucket (limit=3906, now=1287899232,
>> ctb=<optimized out>, ct=0x7faecff61d98) at lib/conntrack.c:1421
>> #8  conntrack_clean (now=1287899232, ct=0x7faecff61d98) at
>> lib/conntrack.c:1457
>> #9  clean_thread_main (f_=0x7faecff61d98) at lib/conntrack.c:1512
>> #10 0x0000562c63a3f48f in ovsthread_wrapper (aux_=<optimized out>) at
>> lib/ovs-thread.c:354
>> #11 0x00007faecef76494 in start_thread (arg=0x7faec77fe700) at
>> pthread_create.c:333
>> #12 0x00007faece56aacf in clone () at
>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>>
>> 2019-03-08T01:15:16.929Z|00055|conntrack(ct_clean3)|WARN|conn_clean: conn
>> not present in hmap: src ip 2.92.142.188 dst ip 222.15.63.163 rev src ip
>> 222.15.63.163 rev dst ip 172.116.154.125 src/dst ports 23446/80 rev src/dst
>> ports 80/23446 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
>>
>>
>> Darrell Ball wrote:
>> > Thanks for your help Solomon
>> >
>> > Can you give the following debug patch a spin ?
>> > I will continue to try to repo locally.
>> >
>> > diff --git a/lib/conntrack.c b/lib/conntrack.c
>> > index 5410ab4..6968c03 100644
>> > --- a/lib/conntrack.c
>> > +++ b/lib/conntrack.c
>> > @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
>> struct
>> > conn_key *key2)
>> >      return 1;
>> >  }
>> >
>> > +static bool
>> > +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
>> > +                 const struct conn_key *key)
>> > +    OVS_REQUIRES(ctb->lock)
>> > +{
>> > +    struct conn *conn;
>> > +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
>> > +
>> > +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>> > +        if (!conn_key_cmp(&conn->key, key)) {
>> > +            return true;
>> > +        }
>> > +    }
>> > +    return false;
>> > +}
>> > +
>> >  static void
>> >  ct_print_conn_info(const struct conn *c, const char *log_msg,
>> >                     enum vlog_level vll, bool force, bool rl_on)
>> > @@ -812,7 +828,14 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>> >          expectation_clean(ct, &conn->key, ct->hash_basis);
>> >      }
>> >      ovs_list_remove(&conn->exp_node);
>> > -    hmap_remove(&ctb->connections, &conn->node);
>> > +
>> > +    if (conn_key_present(ct, ctb, &conn->key)) {
>> > +        hmap_remove(&ctb->connections, &conn->node);
>> > +    } else {
>> > +        char *log_msg = xasprintf("conn_clean: conn not present in
>> hmap");
>> > +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
>> > +        free(log_msg);
>> > +    }
>> >      atomic_count_dec(&ct->n_conn);
>> >      if (conn->nat_info) {
>> >          nat_clean(ct, conn, ctb);
>> > @@ -1383,19 +1406,18 @@ sweep_bucket(struct conntrack *ct, struct
>> > conntrack_bucket *ctb,
>> >
>> >      for (unsigned i = 0; i < N_CT_TM; i++) {
>> >          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
>> > -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>> > -                if (!conn_expired(conn, now) || count >= limit) {
>> > -                    min_expiration = MIN(min_expiration,
>> conn->expiration);
>> > -                    if (count >= limit) {
>> > -                        /* Do not check other lists. */
>> > -                        COVERAGE_INC(conntrack_long_cleanup);
>> > -                        return min_expiration;
>> > -                    }
>> > -                    break;
>> > +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>> > +            if (!conn_expired(conn, now) || count >= limit) {
>> > +                min_expiration = MIN(min_expiration, conn->expiration);
>> > +                if (count >= limit) {
>> > +                    /* Do not check other lists. */
>> > +                    COVERAGE_INC(conntrack_long_cleanup);
>> > +                    return min_expiration;
>> >                  }
>> > -                conn_clean(ct, conn, ctb);
>> > -                count++;
>> > +                break;
>> >              }
>> > +            conn_clean(ct, conn, ctb);
>> > +            count++;
>> >          }
>> >      }
>> >      return min_expiration;
>> > @@ -2540,16 +2562,18 @@ int
>> >  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>> >  {
>> >      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>> > -        struct conn *conn, *next;
>> > -
>> > -        ct_lock_lock(&ct->buckets[i].lock);
>> > -        HMAP_FOR_EACH_SAFE (conn, next, node,
>> &ct->buckets[i].connections)
>> > {
>> > -            if ((!zone || *zone == conn->key.zone) &&
>> > -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>> > -                conn_clean(ct, conn, &ct->buckets[i]);
>> > +        struct conntrack_bucket *ctb = &ct->buckets[i];
>> > +        ct_lock_lock(&ctb->lock);
>> > +        for (unsigned j = 0; j < N_CT_TM; j++) {
>> > +            struct conn *conn, *next;
>> > +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
>> &ctb->exp_lists[j]) {
>> > +                if (!zone || *zone == conn->key.zone) {
>> > +                    ovs_assert(conn->conn_type ==
>> CT_CONN_TYPE_DEFAULT);
>> > +                    conn_clean(ct, conn, ctb);
>> > +                }
>> >              }
>> >          }
>> > -        ct_lock_unlock(&ct->buckets[i].lock);
>> > +        ct_lock_unlock(&ctb->lock);
>> >      }
>> >
>> >      return 0;
>> > (END)
>> >
>> >
>> > On Wed, Mar 6, 2019 at 11:33 PM solomon <[email protected]>
>> wrote:
>> >
>> >> Darrell Ball wrote:
>> >>> +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
>> >> &ctb->exp_lists[j]) {
>> >>> +                if (!zone || *zone == conn->key.zone) {
>> >>> +                    ovs_assert(conn->conn_type ==
>> CT_CONN_TYPE_DEFAULT);
>> >>
>> >> why do we need this assert?
>> >> Clean the CT_CONN_TYPE_DEFAULT type in conntrack_flush(), and clean
>> >> CT_CONN_TYPE_UN_NAT in nat_clean() like following:
>> >> +                if ((!zone || *zone == conn->key.zone) &&
>> >> +                    (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>> >> +                    //ovs_assert(conn->conn_type ==
>> CT_CONN_TYPE_DEFAULT);
>> >>
>> >>
>> >> with the above code, catch an another panic which in ct_clean thread.
>> >> I have see the same panic without changeing the code.
>> >> Both ct_clean and flush-conntrack, can catch the bad bucket value.
>> >>
>> >> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>> >> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>> >> 287         while (*bucket != node) {
>> >> [Current thread is 1 (Thread 0x7f948ffff700 (LWP 2085796))]
>> >> (gdb) bt
>> >> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>> >> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>> >> #1  conn_clean (ct=ct@entry=0x7f9498700d98, conn=0x7f871bdc41b0,
>> >> ctb=ctb@entry=0x7f9498701c38) at lib/conntrack.c:815
>> >> #2  0x0000562ae7402a28 in sweep_bucket (limit=3906, now=1223168469,
>> >> ctb=<optimized out>, ct=0x7f9498700d98) at lib/conntrack.c:1396
>> >> #3  conntrack_clean (now=1223168469, ct=0x7f9498700d98) at
>> >> lib/conntrack.c:1433
>> >> #4  clean_thread_main (f_=0x7f9498700d98) at lib/conntrack.c:1488
>> >> #5  0x0000562ae737a48f in ovsthread_wrapper (aux_=<optimized out>) at
>> >> lib/ovs-thread.c:354
>> >> #6  0x00007f9497715494 in start_thread (arg=0x7f948ffff700) at
>> >> pthread_create.c:333
>> >> #7  0x00007f9496d09acf in clone () at
>> >> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>> >> (gdb) p bucket
>> >> $1 = (struct hmap_node **) 0x8  <====== why is bucket not a point
>> value?
>> >> (gdb) p *(struct hmap *) 0x7f9498701c68
>> >> $2 = {buckets = 0x7f8609f8fc00, one = 0x0, mask = 32767, n = 31040}
>> >> (gdb) p *(struct hmap_node *) 0x7f871bdc4258
>> >> $3 = {hash = 203059667, next = 0x7f8707cfe6c8}
>> >> (gdb) p 203059667&32767
>> >> $4 = 29139
>> >> (gdb) p &hmap->buckets[29139]
>> >> $5 = (struct hmap_node **) 0x7f8609fc8a98
>> >>
>> >>
>> >>> +                    conn_clean(ct, conn, ctb);
>> >>> +                }
>> >>>              }
>> >>>          }
>> >>> -        ct_lock_unlock(&ct->buckets[i].lock);
>> >>> +        ct_lock_unlock(&ctb->lock);
>> >>>      }
>> >>>
>> >>>      return 0;
>> >>>
>> >>>
>> >>> which yields conntrack_flush(...) as
>> >>>
>> >>> int
>> >>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>> >>> {
>> >>>     for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>> >>>         struct conntrack_bucket *ctb = &ct->buckets[i];
>> >>>         ct_lock_lock(&ctb->lock);
>> >>>         for (unsigned j = 0; j < N_CT_TM; j++) {
>> >>>             struct conn *conn, *next;
>> >>>             LIST_FOR_EACH_SAFE (conn, next, exp_node,
>> >> &ctb->exp_lists[j]) {
>> >>>                 if (!zone || *zone == conn->key.zone) {
>> >>>                     ovs_assert(conn->conn_type ==
>> CT_CONN_TYPE_DEFAULT);
>> >>>                     conn_clean(ct, conn, ctb);
>> >>>                 }
>> >>>             }
>> >>>         }
>> >>>         ct_lock_unlock(&ctb->lock);
>> >>>     }
>> >>>
>> >>>     return 0;
>> >>> }
>> >>>
>> >>> Thanks Darrell
>> >>>
>> >>>
>> >>>
>> >>> On Wed, Mar 6, 2019 at 8:06 PM solomon <[email protected]>
>> wrote:
>> >>>
>> >>>>
>> >>>>    When i test conntrack, i catch a panic of ovs.
>> >>>>
>> >>>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>> >>>> -vconsole:emer -vsyslog:err -vfi'.
>> >>>> Program terminated with signal SIGSEGV, Segmentation fault.
>> >>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>> >>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>> >>>> 287         while (*bucket != node) {
>> >>>> [Current thread is 1 (Thread 0x7f8178dccb00 (LWP 2024338))]
>> >>>> (gdb) bt
>> >>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>> >>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>> >>>> #1  conn_clean (ct=ct@entry=0x7f8178c75d98, conn=0x7f734cde0170,
>> >>>> ctb=ctb@entry=0x7f8178c7fd40) at lib/conntrack.c:815
>> >>>> #2  0x00005605c5cdd66a in conntrack_flush (ct=0x7f8178c75d98,
>> zone=0x0)
>> >> at
>> >>>> lib/conntrack.c:2549
>> >>>> #3  0x00005605c5bc2b39 in ct_dpif_flush (dpif=0x5605c68a6430,
>> >>>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
>> >>>> #4  0x00005605c5ce17a0 in dpctl_flush_conntrack (argc=argc@entry=1,
>> >>>> argv=argv@entry=0x5605c697ec30, dpctl_p=dpctl_p@entry
>> =0x7fffee718110)
>> >> at
>> >>>> lib/dpctl.c:1388
>> >>>> #5  0x00005605c5cdeb78 in dpctl_unixctl_handler (conn=0x5605c6959ca0,
>> >>>> argc=1, argv=0x5605c697ec30, aux=0x5605c5ce1610
>> >> <dpctl_flush_conntrack>) at
>> >>>> lib/dpctl.c:2312
>> >>>> #6  0x00005605c5c806ea in process_command (request=<optimized out>,
>> >>>> conn=0x5605c6959ca0) at lib/unixctl.c:308
>> >>>> #7  run_connection (conn=0x5605c6959ca0) at lib/unixctl.c:342
>> >>>> #8  unixctl_server_run (server=0x5605c6868230) at lib/unixctl.c:393
>> >>>> #9  0x00005605c5804217 in main (argc=<optimized out>, argv=<optimized
>> >>>> out>) at vswitchd/ovs-vswitchd.c:126
>> >>>>
>> >>>>
>> >>>> Environment:
>> >>>> ovs-2.10.1
>> >>>> dpdk-18.0.2.2
>> >>>>
>> >>>> How-To-Repeat:
>> >>>> 1. configure ovs with snat aciton.
>> >>>>
>> >>>> ovs-ofctl  -O OpenFlow15 add-group $br_name "group_id=1, type=select,
>> >>>> selection_method=hash
>> >>>>
>> >>
>> bucket=bucket_id=1,weight:100,actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>> >>>> "
>> >>>>
>> >>>> 2. syn-ddos send tcp syn packet to generate connection tracks.
>> >>>> 3.
>> >>>> #   ovs-appctl dpctl/ct-get-nconns
>> >>>> 2063993
>> >>>> #   ovs-appctl dpctl/flush-conntrack
>> >>>>
>> >>>> 2019-03-07T03:52:24Z|00001|unixctl|WARN|error communicating with
>> >>>> unix:/var/run/openvswitch/ovs-vswitchd.2024338.ctl: End of file
>> >>>> ovs-appctl: ovs-vswitchd: transaction error (End of file)
>> >>>>
>> >>>>
>> >>>> --
>> >>>> Thanks
>> >>>> Solomon
>> >>>>
>> >>>
>> >>
>> >> --
>> >>
>> >> Thanks
>> >> Solomon
>> >>
>> >
>>
>> --
>>
>> Thanks
>> Li Wei
>>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to