Thanks Solomon

There is an issue with the fix; pls try the following patch in my repo.
It is not cleaned up and also has some extra temp code, but works on my
side.

I also noticed that not all of the changes I had previously done were
merged, like

-    unsigned bucket_rev_conn =
-        hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
+    unsigned bucket_rev_conn = hash_to_bucket(hash);

Hence, I think it will be easier just to point to one of my public git
repos.

https://github.com/darball/ovs_master/commits/branch-2.10

Darrell




On Sun, Mar 10, 2019 at 8:49 PM Li Wei <[email protected]> wrote:

> hi  Darrell:
>
>   With your new patch(see attached file), still panic both in
> conntrack_flush() and sweep_bucket().
>
> (gdb) bt
> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
> #1  0x00007f2d9af7942a in __GI_abort () at abort.c:89
> #2  0x00007f2d9afb5c00 in __libc_message (do_abort=do_abort@entry=2,
> fmt=fmt@entry=0x7f2d9b0aad98 "*** Error in `%s': %s: 0x%s ***\n")
>     at ../sysdeps/posix/libc_fatal.c:175
> #3  0x00007f2d9afbbfc6 in malloc_printerr (action=3, str=0x7f2d9b0aaef0
> "double free or corruption (fasttop)", ptr=<optimized out>,
> ar_ptr=<optimized out>)
>     at malloc.c:5049
> #4  0x00007f2d9afbc80e in _int_free (av=0x7f2698000020, p=0x7f1fcbb78440,
> have_lock=0) at malloc.c:3905
> #5  0x000055a2c3e0df90 in delete_conn (conn=conn@entry=0x7f1fcbb78340) at
> lib/conntrack.c:2419
> #6  0x000055a2c3e0f31c in nat_clean (ctb=0x7f2d9ca2cd90,
> conn=0x7f1fcbb78340, ct=0x7f2d9ca24d98) at lib/conntrack.c:843
> #7  conn_clean (ct=ct@entry=0x7f2d9ca24d98, conn=0x7f1fcbb78340,
> ctb=ctb@entry=0x7f2d9ca2cd90) at lib/conntrack.c:869
> #8  0x000055a2c3e14820 in conntrack_flush (ct=0x7f2d9ca24d98, zone=0x0) at
> lib/conntrack.c:2613
> #9  0x000055a2c3cf9b39 in ct_dpif_flush (dpif=0x55a2c4e7b6f0,
> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
> #10 0x000055a2c3e18970 in dpctl_flush_conntrack (argc=argc@entry=1,
> argv=argv@entry=0x55a2c4dd2550, dpctl_p=dpctl_p@entry=0x7ffd9fda7600) at
> lib/dpctl.c:1388
> #11 0x000055a2c3e15d48 in dpctl_unixctl_handler (conn=0x55a2c4d9cf50,
> argc=1, argv=0x55a2c4dd2550, aux=0x55a2c3e187e0 <dpctl_flush_conntrack>) at
> lib/dpctl.c:2312
> #12 0x000055a2c3db76ea in process_command (request=<optimized out>,
> conn=0x55a2c4d9cf50) at lib/unixctl.c:308
> #13 run_connection (conn=0x55a2c4d9cf50) at lib/unixctl.c:342
> #14 unixctl_server_run (server=0x55a2c4d94230) at lib/unixctl.c:393
> #15 0x000055a2c393b217 in main (argc=<optimized out>, argv=<optimized
> out>) at vswitchd/ovs-vswitchd.c:126
>
> # cat /var/log/openvswitch/ovs-vswitchd.log|grep conntrack
> 2019-03-11T03:36:31.293Z|00001|conntrack(pmd61)|WARN|Unusual condition for
> un_nat conn create: nat_conn_key_node/rev_conn (nil)/(nil): src ip
> 222.15.63.163 dst ip 172.89.78.140 rev src ip 2.128.173.3 rev dst ip
> 222.15.63.163 src/dst ports 80/10623 rev src/dst ports 10623/80 zone/rev
> zone 0/0 nw_proto/rev nw_proto 6/6
> 2019-03-11T03:36:33.599Z|00001|conntrack(pmd82)|WARN|Unusual condition for
> un_nat conn create: nat_conn_key_node/rev_conn (nil)/(nil): src ip
> 222.15.63.163 dst ip 172.31.235.224 rev src ip 2.143.150.201 rev dst ip
> 222.15.63.163 src/dst ports 80/17937 rev src/dst ports 17937/80 zone/rev
> zone 0/0 nw_proto/rev nw_proto 6/6
> 2019-03-11T03:36:33.907Z|00054|coverage(ct_clean3)|INFO|conntrack_full
>        13374.0/sec 61885.550/sec     1031.4258/sec   total: 3718506
> 2019-03-11T03:36:33.907Z|00055|coverage(ct_clean3)|INFO|conntrack_long_cleanup
>    0.0/sec     0.000/sec        0.0000/sec   total: 188
> 2019-03-11T03:36:33.907Z|00059|poll_loop(ct_clean3)|INFO|wakeup due to
> 0-ms timeout at lib/conntrack.c:1554 (64% CPU usage)
> 2019-03-11T03:36:34.004Z|00221|conntrack|WARN|conn_clean: conn not present
> in hmap: src ip 240.132.183.203 dst ip 222.15.63.163 rev src ip
> 222.15.63.163 rev dst ip 172.68.216.36 src/dst ports 31344/80 rev src/dst
> ports 80/31344 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
>
>
> Darrell Ball wrote:
> > Hi Solomon
> >
> > There was a typo in nat_clean(...)
> >
> >      ct_rwlock_unlock(&ct->resources_lock);
> >      ct_lock_unlock(&ctb->lock);
> >      uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> > -    unsigned bucket_rev_conn =
> > -        hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
> > +    unsigned bucket_rev_conn = hash_to_bucket(hash);
> >      ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
> >      ct_rwlock_wrlock(&ct->resources_lock);
> >
> > I also cleaned up the patch a little more and ended up with:
> >
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 5410ab4..68d9816 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
> struct
> > conn_key *key2)
> >      return 1;
> >  }
> >
> > +static bool
> > +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> > +                 const struct conn_key *key)
> > +    OVS_REQUIRES(ctb->lock)
> > +{
> > +    struct conn *conn;
> > +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> > +
> > +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> > +        if (!conn_key_cmp(&conn->key, key)) {
> > +            return true;
> > +        }
> > +    }
> > +    return false;
> > +}
> > +
> >  static void
> >  ct_print_conn_info(const struct conn *c, const char *log_msg,
> >                     enum vlog_level vll, bool force, bool rl_on)
> > @@ -738,29 +754,53 @@ un_nat_packet(struct dp_packet *pkt, const struct
> > conn *conn,
> >      }
> >  }
> >
> > -/* Typical usage of this helper is in non per-packet code;
> > - * this is because the bucket lock needs to be held for lookup
> > - * and a hash would have already been needed. Hence, this function
> > - * is just intended for code clarity. */
> > +/* This function is called with the bucket lock held. */
> >  static struct conn *
> > -conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
> > now)
> > +conn_lookup_any(const struct conn_key *key,
> > +                const struct conntrack_bucket *ctb, uint32_t hash)
> >  {
> > -    struct conn_lookup_ctx ctx;
> > -    ctx.conn = NULL;
> > -    ctx.key = *key;
> > -    ctx.hash = conn_key_hash(key, ct->hash_basis);
> > -    unsigned bucket = hash_to_bucket(ctx.hash);
> > -    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> > -    return ctx.conn;
> > +    struct conn *conn = NULL;
> > +
> > +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> > +        if (!conn_key_cmp(&conn->key, key)) {
> > +            break;
> > +        }
> > +        if (!conn_key_cmp(&conn->rev_key, key)) {
> > +            break;
> > +        }
> > +    }
> > +    return conn;
> > +}
> > +
> > +/* This function is called with the bucket lock held. */
> > +static struct conn *
> > +conn_lookup_unnat(const struct conn_key *key,
> > +                  const struct conntrack_bucket *ctb, uint32_t hash)
> > +{
> > +    struct conn *conn = NULL;
> > +
> > +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> > +        if (!conn_key_cmp(&conn->key, key)
> > +            && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> > +            break;
> > +        }
> > +    }
> > +    return conn;
> >  }
> >
> >  static void
> >  conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
> >                    long long now, int seq_skew, bool seq_skew_dir)
> >  {
> > -    unsigned bucket = hash_to_bucket(conn_key_hash(key,
> ct->hash_basis));
> > +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> > +    unsigned bucket = hash_to_bucket(hash);
> >      ct_lock_lock(&ct->buckets[bucket].lock);
> > -    struct conn *conn = conn_lookup(ct, key, now);
> > +    struct conn_lookup_ctx ctx;
> > +    ctx.key = *key;
> > +    ctx.hash = hash;
> > +    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> > +    struct conn *conn = ctx.conn;
> > +
> >      if (conn && seq_skew) {
> >          conn->seq_skew = seq_skew;
> >          conn->seq_skew_dir = seq_skew_dir;
> > @@ -777,12 +817,13 @@ nat_clean(struct conntrack *ct, struct conn *conn,
> >      nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
> > ct->hash_basis);
> >      ct_rwlock_unlock(&ct->resources_lock);
> >      ct_lock_unlock(&ctb->lock);
> > -    unsigned bucket_rev_conn =
> > -        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
> > +    uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> > +    unsigned bucket_rev_conn = hash_to_bucket(hash);
> >      ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
> >      ct_rwlock_wrlock(&ct->resources_lock);
> > -    long long now = time_msec();
> > -    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
> > +    struct conn *rev_conn = conn_lookup_unnat(&conn->rev_key,
> > +
> > &ct->buckets[bucket_rev_conn],
> > +                                              hash);
> >      struct nat_conn_key_node *nat_conn_key_node =
> >          nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
> >                               ct->hash_basis);
> > @@ -812,7 +853,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
> >          expectation_clean(ct, &conn->key, ct->hash_basis);
> >      }
> >      ovs_list_remove(&conn->exp_node);
> > -    hmap_remove(&ctb->connections, &conn->node);
> > +
> > +    /* Temporary debug check. */
> > +    if (conn_key_present(ct, ctb, &conn->key)) {
> > +        hmap_remove(&ctb->connections, &conn->node);
> > +    } else {
> > +        char *log_msg = xasprintf("conn_clean: conn not present in
> hmap");
> > +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> > +        free(log_msg);
> > +    }
> > +
> >      atomic_count_dec(&ct->n_conn);
> >      if (conn->nat_info) {
> >          nat_clean(ct, conn, ctb);
> > @@ -1005,7 +1055,7 @@ conn_update_state(struct conntrack *ct, struct
> > dp_packet *pkt,
> >
> >  static void
> >  create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
> > -                   long long now, bool alg_un_nat)
> > +                   bool alg_un_nat)
> >  {
> >      struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
> >      nc->key = conn_for_un_nat_copy->rev_key;
> > @@ -1013,7 +1063,9 @@ create_un_nat_conn(struct conntrack *ct, struct
> conn
> > *conn_for_un_nat_copy,
> >      uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
> >      unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
> >      ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
> > -    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
> > +    struct conn *rev_conn = conn_lookup_any(&nc->key,
> > +
> > &ct->buckets[un_nat_conn_bucket],
> > +                                            un_nat_hash);
> >
> >      if (alg_un_nat) {
> >          if (!rev_conn) {
> > @@ -1022,7 +1074,7 @@ create_un_nat_conn(struct conntrack *ct, struct
> conn
> > *conn_for_un_nat_copy,
> >          } else {
> >              char *log_msg = xasprintf("Unusual condition for un_nat
> conn "
> >                                        "create for alg: rev_conn %p",
> > rev_conn);
> > -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> > +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> >              free(log_msg);
> >              free(nc);
> >          }
> > @@ -1030,16 +1082,26 @@ create_un_nat_conn(struct conntrack *ct, struct
> > conn *conn_for_un_nat_copy,
> >          ct_rwlock_rdlock(&ct->resources_lock);
> >
> >          struct nat_conn_key_node *nat_conn_key_node =
> > -            nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> > ct->hash_basis);
> > +           nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> > ct->hash_basis);
> >          if (nat_conn_key_node &&
> !conn_key_cmp(&nat_conn_key_node->value,
> > -            &nc->rev_key) && !rev_conn) {
> > -            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> > -                        &nc->node, un_nat_hash);
> > +                                               &nc->rev_key)) {
> > +            if (!rev_conn) {
> > +
> hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> > +                            &nc->node, un_nat_hash);
> > +            } else {
> > +                char *log_msg = xasprintf("NAT conflict; un-nat will
> not"
> > +                                          "happen; likely DOS");
> > +                ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> > +                free(log_msg);
> > +                nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
> > +                                     ct->hash_basis);
> > +                free(nc);
> > +            }
> >          } else {
> >              char *log_msg = xasprintf("Unusual condition for un_nat
> conn "
> >                                        "create:
> nat_conn_key_node/rev_conn "
> > -                                      "%p/%p", nat_conn_key_node,
> > rev_conn);
> > -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> > +                                      "%p", nat_conn_key_node);
> > +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> >              free(log_msg);
> >              free(nc);
> >          }
> > @@ -1286,7 +1348,7 @@ process_one(struct conntrack *ct, struct dp_packet
> > *pkt,
> >      ct_lock_unlock(&ct->buckets[bucket].lock);
> >
> >      if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
> > -        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
> > +        create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
> >      }
> >
> >      handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now,
> !!nat_action_info,
> > @@ -1383,19 +1445,18 @@ sweep_bucket(struct conntrack *ct, struct
> > conntrack_bucket *ctb,
> >
> >      for (unsigned i = 0; i < N_CT_TM; i++) {
> >          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> > -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> > -                if (!conn_expired(conn, now) || count >= limit) {
> > -                    min_expiration = MIN(min_expiration,
> conn->expiration);
> > -                    if (count >= limit) {
> > -                        /* Do not check other lists. */
> > -                        COVERAGE_INC(conntrack_long_cleanup);
> > -                        return min_expiration;
> > -                    }
> > -                    break;
> > +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> > +            if (!conn_expired(conn, now) || count >= limit) {
> > +                min_expiration = MIN(min_expiration, conn->expiration);
> > +                if (count >= limit) {
> > +                    /* Do not check other lists. */
> > +                    COVERAGE_INC(conntrack_long_cleanup);
> > +                    return min_expiration;
> >                  }
> > -                conn_clean(ct, conn, ctb);
> > -                count++;
> > +                break;
> >              }
> > +            conn_clean(ct, conn, ctb);
> > +            count++;
> >          }
> >      }
> >      return min_expiration;
> > @@ -2540,16 +2601,18 @@ int
> >  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> >  {
> >      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> > -        struct conn *conn, *next;
> > -
> > -        ct_lock_lock(&ct->buckets[i].lock);
> > -        HMAP_FOR_EACH_SAFE (conn, next, node,
> &ct->buckets[i].connections)
> > {
> > -            if ((!zone || *zone == conn->key.zone) &&
> > -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> > -                conn_clean(ct, conn, &ct->buckets[i]);
> > +        struct conntrack_bucket *ctb = &ct->buckets[i];
> > +        ct_lock_lock(&ctb->lock);
> > +        for (unsigned j = 0; j < N_CT_TM; j++) {
> > +            struct conn *conn, *next;
> > +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
> &ctb->exp_lists[j]) {
> > +                if (!zone || *zone == conn->key.zone) {
> > +                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> > +                    conn_clean(ct, conn, ctb);
> > +                }
> >              }
> >          }
> > -        ct_lock_unlock(&ct->buckets[i].lock);
> > +        ct_lock_unlock(&ctb->lock);
> >      }
> >
> >      return 0;
> > (END)
> >
> >
> >
> >
> > On Fri, Mar 8, 2019 at 8:07 AM Darrell Ball <[email protected]> wrote:
> >
> >> Thanks for confirming Solomon; can you check the following patch on your
> >> side as well ?
> >>
> >> Darrell
> >>
> >> diff --git a/lib/conntrack.c b/lib/conntrack.c
> >> index 5410ab4..c6b06d6 100644
> >> --- a/lib/conntrack.c
> >> +++ b/lib/conntrack.c
> >> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
> >> struct conn_key *key2)
> >>      return 1;
> >>  }
> >>
> >> +static bool
> >> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> >> +                 const struct conn_key *key)
> >> +    OVS_REQUIRES(ctb->lock)
> >> +{
> >> +    struct conn *conn;
> >> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> >> +
> >> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> >> +        if (!conn_key_cmp(&conn->key, key)) {
> >> +            return true;
> >> +        }
> >> +    }
> >> +    return false;
> >> +}
> >> +
> >>  static void
> >>  ct_print_conn_info(const struct conn *c, const char *log_msg,
> >>                     enum vlog_level vll, bool force, bool rl_on)
> >> @@ -738,29 +754,55 @@ un_nat_packet(struct dp_packet *pkt, const struct
> >> conn *conn,
> >>      }
> >>  }
> >>
> >> -/* Typical usage of this helper is in non per-packet code;
> >> - * this is because the bucket lock needs to be held for lookup
> >> - * and a hash would have already been needed. Hence, this function
> >> - * is just intended for code clarity. */
> >>  static struct conn *
> >> -conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
> >> now)
> >> +conn_lookup_any(struct conntrack *ct, const struct conn_key *key,
> >> +                uint32_t hash)
> >>  {
> >> -    struct conn_lookup_ctx ctx;
> >> -    ctx.conn = NULL;
> >> -    ctx.key = *key;
> >> -    ctx.hash = conn_key_hash(key, ct->hash_basis);
> >> -    unsigned bucket = hash_to_bucket(ctx.hash);
> >> -    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> >> -    return ctx.conn;
> >> +    unsigned bucket = hash_to_bucket(hash);
> >> +    struct conntrack_bucket *ctb = &ct->buckets[bucket];
> >> +    struct conn *conn = NULL;
> >> +
> >> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> >> +        if (!conn_key_cmp(&conn->key, key)) {
> >> +            break;
> >> +        }
> >> +        if (!conn_key_cmp(&conn->rev_key, key)) {
> >> +            break;
> >> +        }
> >> +    }
> >> +    return conn;
> >> +}
> >> +
> >> +static struct conn *
> >> +conn_lookup_unnat(struct conntrack *ct, const struct conn_key *key,
> >> +                  uint32_t hash)
> >> +{
> >> +    unsigned bucket = hash_to_bucket(hash);
> >> +    struct conntrack_bucket *ctb = &ct->buckets[bucket];
> >> +    struct conn *conn = NULL;
> >> +
> >> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> >> +        if (!conn_key_cmp(&conn->key, key)
> >> +            && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> >> +            break;
> >> +        }
> >> +    }
> >> +    return conn;
> >>  }
> >>
> >>  static void
> >>  conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
> >>                    long long now, int seq_skew, bool seq_skew_dir)
> >>  {
> >> -    unsigned bucket = hash_to_bucket(conn_key_hash(key,
> ct->hash_basis));
> >> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> >> +    unsigned bucket = hash_to_bucket(hash);
> >>      ct_lock_lock(&ct->buckets[bucket].lock);
> >> -    struct conn *conn = conn_lookup(ct, key, now);
> >> +    struct conn_lookup_ctx ctx;
> >> +    ctx.key = *key;
> >> +    ctx.hash = hash;
> >> +    conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> >> +    struct conn *conn = ctx.conn;
> >> +
> >>      if (conn && seq_skew) {
> >>          conn->seq_skew = seq_skew;
> >>          conn->seq_skew_dir = seq_skew_dir;
> >> @@ -777,12 +819,12 @@ nat_clean(struct conntrack *ct, struct conn *conn,
> >>      nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
> >> ct->hash_basis);
> >>      ct_rwlock_unlock(&ct->resources_lock);
> >>      ct_lock_unlock(&ctb->lock);
> >> +    uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> >>      unsigned bucket_rev_conn =
> >> -        hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
> >> +        hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
> >>      ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
> >>      ct_rwlock_wrlock(&ct->resources_lock);
> >> -    long long now = time_msec();
> >> -    struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
> >> +    struct conn *rev_conn = conn_lookup_unnat(ct, &conn->rev_key,
> hash);
> >>      struct nat_conn_key_node *nat_conn_key_node =
> >>          nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
> >>                               ct->hash_basis);
> >> @@ -812,7 +854,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
> >>          expectation_clean(ct, &conn->key, ct->hash_basis);
> >>      }
> >>      ovs_list_remove(&conn->exp_node);
> >> -    hmap_remove(&ctb->connections, &conn->node);
> >> +
> >> +    /* Temporary debug check. */
> >> +    if (conn_key_present(ct, ctb, &conn->key)) {
> >> +        hmap_remove(&ctb->connections, &conn->node);
> >> +    } else {
> >> +        char *log_msg = xasprintf("conn_clean: conn not present in
> hmap");
> >> +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> >> +        free(log_msg);
> >> +    }
> >> +
> >>      atomic_count_dec(&ct->n_conn);
> >>      if (conn->nat_info) {
> >>          nat_clean(ct, conn, ctb);
> >> @@ -1005,7 +1056,7 @@ conn_update_state(struct conntrack *ct, struct
> >> dp_packet *pkt,
> >>
> >>  static void
> >>  create_un_nat_conn(struct conntrack *ct, struct conn
> >> *conn_for_un_nat_copy,
> >> -                   long long now, bool alg_un_nat)
> >> +                   bool alg_un_nat)
> >>  {
> >>      struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
> >>      nc->key = conn_for_un_nat_copy->rev_key;
> >> @@ -1013,7 +1064,7 @@ create_un_nat_conn(struct conntrack *ct, struct
> conn
> >> *conn_for_un_nat_copy,
> >>      uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
> >>      unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
> >>      ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
> >> -    struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
> >> +    struct conn *rev_conn = conn_lookup_any(ct, &nc->key, un_nat_hash);
> >>
> >>      if (alg_un_nat) {
> >>          if (!rev_conn) {
> >> @@ -1022,7 +1073,7 @@ create_un_nat_conn(struct conntrack *ct, struct
> conn
> >> *conn_for_un_nat_copy,
> >>          } else {
> >>              char *log_msg = xasprintf("Unusual condition for un_nat
> conn "
> >>                                        "create for alg: rev_conn %p",
> >> rev_conn);
> >> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> >> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> >>              free(log_msg);
> >>              free(nc);
> >>          }
> >> @@ -1030,16 +1081,26 @@ create_un_nat_conn(struct conntrack *ct, struct
> >> conn *conn_for_un_nat_copy,
> >>          ct_rwlock_rdlock(&ct->resources_lock);
> >>
> >>          struct nat_conn_key_node *nat_conn_key_node =
> >> -            nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> >> ct->hash_basis);
> >> +           nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> >> ct->hash_basis);
> >>          if (nat_conn_key_node &&
> !conn_key_cmp(&nat_conn_key_node->value,
> >> -            &nc->rev_key) && !rev_conn) {
> >> -            hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> >> -                        &nc->node, un_nat_hash);
> >> +                                               &nc->rev_key)) {
> >> +            if (!rev_conn) {
> >> +
> hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> >> +                            &nc->node, un_nat_hash);
> >> +            } else {
> >> +                char *log_msg = xasprintf("NAT conflict; un-nat will
> not"
> >> +                                          "happen; likely DOS");
> >> +                ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> >> +                free(log_msg);
> >> +                nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
> >> +                                     ct->hash_basis);
> >> +                free(nc);
> >> +            }
> >>          } else {
> >>              char *log_msg = xasprintf("Unusual condition for un_nat
> conn "
> >>                                        "create:
> nat_conn_key_node/rev_conn
> >> "
> >> -                                      "%p/%p", nat_conn_key_node,
> >> rev_conn);
> >> -            ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> >> +                                      "%p", nat_conn_key_node);
> >> +            ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> >>              free(log_msg);
> >>              free(nc);
> >>          }
> >> @@ -1286,7 +1347,7 @@ process_one(struct conntrack *ct, struct dp_packet
> >> *pkt,
> >>      ct_lock_unlock(&ct->buckets[bucket].lock);
> >>
> >>      if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
> >> -        create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
> >> +        create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
> >>      }
> >>
> >>      handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now,
> !!nat_action_info,
> >> @@ -1383,19 +1444,18 @@ sweep_bucket(struct conntrack *ct, struct
> >> conntrack_bucket *ctb,
> >>
> >>      for (unsigned i = 0; i < N_CT_TM; i++) {
> >>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> >> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> >> -                if (!conn_expired(conn, now) || count >= limit) {
> >> -                    min_expiration = MIN(min_expiration,
> >> conn->expiration);
> >> -                    if (count >= limit) {
> >> -                        /* Do not check other lists. */
> >> -                        COVERAGE_INC(conntrack_long_cleanup);
> >> -                        return min_expiration;
> >> -                    }
> >> -                    break;
> >> +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> >> +            if (!conn_expired(conn, now) || count >= limit) {
> >> +                min_expiration = MIN(min_expiration, conn->expiration);
> >> +                if (count >= limit) {
> >> +                    /* Do not check other lists. */
> >> +                    COVERAGE_INC(conntrack_long_cleanup);
> >> +                    return min_expiration;
> >>                  }
> >> -                conn_clean(ct, conn, ctb);
> >> -                count++;
> >> +                break;
> >>              }
> >> +            conn_clean(ct, conn, ctb);
> >> +            count++;
> >>          }
> >>      }
> >>      return min_expiration;
> >> @@ -2540,16 +2600,18 @@ int
> >>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> >>  {
> >>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> >> -        struct conn *conn, *next;
> >> -
> >> -        ct_lock_lock(&ct->buckets[i].lock);
> >> -        HMAP_FOR_EACH_SAFE (conn, next, node,
> >> &ct->buckets[i].connections) {
> >> -            if ((!zone || *zone == conn->key.zone) &&
> >> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> >> -                conn_clean(ct, conn, &ct->buckets[i]);
> >> +        struct conntrack_bucket *ctb = &ct->buckets[i];
> >> +        ct_lock_lock(&ctb->lock);
> >> +        for (unsigned j = 0; j < N_CT_TM; j++) {
> >> +            struct conn *conn, *next;
> >> +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
> &ctb->exp_lists[j])
> >> {
> >> +                if (!zone || *zone == conn->key.zone) {
> >> +                    ovs_assert(conn->conn_type ==
> CT_CONN_TYPE_DEFAULT);
> >> +                    conn_clean(ct, conn, ctb);
> >> +                }
> >>              }
> >>          }
> >> -        ct_lock_unlock(&ct->buckets[i].lock);
> >> +        ct_lock_unlock(&ctb->lock);
> >>      }
> >>
> >>      return 0;
> >> (END)
> >>
> >>
> >> On Thu, Mar 7, 2019 at 5:30 PM Li Wei <[email protected]> wrote:
> >>
> >>> hi darrell:
> >>>
> >>> I set nat action with
> >>> actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
> >>>
> >>> With your patch, new double free panic happens in conntrack_flush() and
> >>> sweep_bucket():
> >>>
> >>> ==1st panic==
> >>> [Thread debugging using libthread_db enabled]
> >>> Using host libthread_db library
> "/lib/x86_64-linux-gnu/libthread_db.so.1".
> >>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
> >>> -vconsole:emer -vsyslog:err -vfi'.
> >>> Program terminated with signal SIGABRT, Aborted.
> >>> #0  __GI_raise (sig=sig@entry=6) at
> ../sysdeps/unix/sysv/linux/raise.c:51
> >>> 51      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
> >>> [Current thread is 1 (Thread 0x7f92b122cb00 (LWP 2387347))]
> >>> (gdb) bt
> >>> #0  __GI_raise (sig=sig@entry=6) at
> ../sysdeps/unix/sysv/linux/raise.c:51
> >>> #1  0x00007f92af62a42a in __GI_abort () at abort.c:89
> >>> #2  0x00007f92af666c00 in __libc_message (do_abort=do_abort@entry=2,
> >>> fmt=fmt@entry=0x7f92af75bd98 "*** Error in `%s': %s: 0x%s ***\n")
> >>>     at ../sysdeps/posix/libc_fatal.c:175
> >>> #3  0x00007f92af66cfc6 in malloc_printerr (action=3, str=0x7f92af75bef0
> >>> "double free or corruption (fasttop)", ptr=<optimized out>,
> >>> ar_ptr=<optimized out>)
> >>>     at malloc.c:5049
> >>> #4  0x00007f92af66d80e in _int_free (av=0x7f8bb4000020,
> p=0x7f8bb70ef770,
> >>> have_lock=0) at malloc.c:3905
> >>> #5  0x00005622735d3f90 in delete_conn (conn=conn@entry=0x7f8bb70ef670)
> >>> at lib/conntrack.c:2380
> >>> #6  0x00005622735d52ad in nat_clean (ctb=0x7f92b10da7f0,
> >>> conn=0x7f8bb70ef670, ct=0x7f92b10d5d98) at lib/conntrack.c:816
> >>> #7  conn_clean (ct=ct@entry=0x7f92b10d5d98, conn=0x7f8bb70ef670,
> >>> ctb=ctb@entry=0x7f92b10da7f0) at lib/conntrack.c:842
> >>> #8  0x00005622735da710 in conntrack_flush (ct=0x7f92b10d5d98, zone=0x0)
> >>> at lib/conntrack.c:2574
> >>> #9  0x00005622734bfb39 in ct_dpif_flush (dpif=0x5622758671d0,
> >>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
> >>> #10 0x00005622735de860 in dpctl_flush_conntrack (argc=argc@entry=1,
> >>> argv=argv@entry=0x56227589cc40, dpctl_p=dpctl_p@entry=0x7fff9087fe90)
> at
> >>> lib/dpctl.c:1388
> >>> #11 0x00005622735dbc38 in dpctl_unixctl_handler (conn=0x56227589bc90,
> >>> argc=1, argv=0x56227589cc40, aux=0x5622735de6d0
> <dpctl_flush_conntrack>) at
> >>> lib/dpctl.c:2312
> >>> #12 0x000056227357d6ea in process_command (request=<optimized out>,
> >>> conn=0x56227589bc90) at lib/unixctl.c:308
> >>> #13 run_connection (conn=0x56227589bc90) at lib/unixctl.c:342
> >>> #14 unixctl_server_run (server=0x5622757af230) at lib/unixctl.c:393
> >>> #15 0x0000562273101217 in main (argc=<optimized out>, argv=<optimized
> >>> out>) at vswitchd/ovs-vswitchd.c:126
> >>>
> >>> The debug info in /var/log/openvswitch/ovs-vswitchd.log:
> >>> 70 2019-03-08T00:54:31.278Z|00227|conntrack|WARN|conn_clean: conn not
> >>> present in hmap: src ip 32.248.14.183 dst ip 222.15.63.163 rev src ip
> >>> 222.15.63.163 rev dst ip     172.112.98.138 src/dst ports 54957/80 rev
> >>> src/dst ports 80/54957 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
> >>>
> >>> ==sec panic==
> >>> (gdb) bt
> >>> #0  __GI_raise (sig=sig@entry=6) at
> ../sysdeps/unix/sysv/linux/raise.c:51
> >>> #1  0x00007faece4b642a in __GI_abort () at abort.c:89
> >>> #2  0x00007faece4f2c00 in __libc_message (do_abort=do_abort@entry=2,
> >>> fmt=fmt@entry=0x7faece5e7d98 "*** Error in `%s': %s: 0x%s ***\n")
> >>>     at ../sysdeps/posix/libc_fatal.c:175
> >>> #3  0x00007faece4f8fc6 in malloc_printerr (action=3, str=0x7faece5e7e60
> >>> "double free or corruption (out)", ptr=<optimized out>,
> ar_ptr=<optimized
> >>> out>)
> >>>     at malloc.c:5049
> >>> #4  0x00007faece4f980e in _int_free (av=0x7faece81bb00 <main_arena>,
> >>> p=0x7fa11c50ee30, have_lock=0) at malloc.c:3905
> >>> #5  0x0000562c63ac82ad in nat_clean (ctb=0x7faecff65cf8,
> >>> conn=0x7fa11c50ee40, ct=0x7faecff61d98) at lib/conntrack.c:816
> >>> #6  conn_clean (ct=ct@entry=0x7faecff61d98, conn=0x7fa11c50ee40,
> >>> ctb=ctb@entry=0x7faecff65cf8) at lib/conntrack.c:842
> >>> #7  0x0000562c63ac8639 in sweep_bucket (limit=3906, now=1287899232,
> >>> ctb=<optimized out>, ct=0x7faecff61d98) at lib/conntrack.c:1421
> >>> #8  conntrack_clean (now=1287899232, ct=0x7faecff61d98) at
> >>> lib/conntrack.c:1457
> >>> #9  clean_thread_main (f_=0x7faecff61d98) at lib/conntrack.c:1512
> >>> #10 0x0000562c63a3f48f in ovsthread_wrapper (aux_=<optimized out>) at
> >>> lib/ovs-thread.c:354
> >>> #11 0x00007faecef76494 in start_thread (arg=0x7faec77fe700) at
> >>> pthread_create.c:333
> >>> #12 0x00007faece56aacf in clone () at
> >>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> >>>
> >>> 2019-03-08T01:15:16.929Z|00055|conntrack(ct_clean3)|WARN|conn_clean:
> conn
> >>> not present in hmap: src ip 2.92.142.188 dst ip 222.15.63.163 rev src
> ip
> >>> 222.15.63.163 rev dst ip 172.116.154.125 src/dst ports 23446/80 rev
> src/dst
> >>> ports 80/23446 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
> >>>
> >>>
> >>> Darrell Ball wrote:
> >>>> Thanks for your help Solomon
> >>>>
> >>>> Can you give the following debug patch a spin ?
> >>>> I will continue to try to repo locally.
> >>>>
> >>>> diff --git a/lib/conntrack.c b/lib/conntrack.c
> >>>> index 5410ab4..6968c03 100644
> >>>> --- a/lib/conntrack.c
> >>>> +++ b/lib/conntrack.c
> >>>> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
> >>> struct
> >>>> conn_key *key2)
> >>>>      return 1;
> >>>>  }
> >>>>
> >>>> +static bool
> >>>> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> >>>> +                 const struct conn_key *key)
> >>>> +    OVS_REQUIRES(ctb->lock)
> >>>> +{
> >>>> +    struct conn *conn;
> >>>> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> >>>> +
> >>>> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> >>>> +        if (!conn_key_cmp(&conn->key, key)) {
> >>>> +            return true;
> >>>> +        }
> >>>> +    }
> >>>> +    return false;
> >>>> +}
> >>>> +
> >>>>  static void
> >>>>  ct_print_conn_info(const struct conn *c, const char *log_msg,
> >>>>                     enum vlog_level vll, bool force, bool rl_on)
> >>>> @@ -812,7 +828,14 @@ conn_clean(struct conntrack *ct, struct conn
> *conn,
> >>>>          expectation_clean(ct, &conn->key, ct->hash_basis);
> >>>>      }
> >>>>      ovs_list_remove(&conn->exp_node);
> >>>> -    hmap_remove(&ctb->connections, &conn->node);
> >>>> +
> >>>> +    if (conn_key_present(ct, ctb, &conn->key)) {
> >>>> +        hmap_remove(&ctb->connections, &conn->node);
> >>>> +    } else {
> >>>> +        char *log_msg = xasprintf("conn_clean: conn not present in
> >>> hmap");
> >>>> +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> >>>> +        free(log_msg);
> >>>> +    }
> >>>>      atomic_count_dec(&ct->n_conn);
> >>>>      if (conn->nat_info) {
> >>>>          nat_clean(ct, conn, ctb);
> >>>> @@ -1383,19 +1406,18 @@ sweep_bucket(struct conntrack *ct, struct
> >>>> conntrack_bucket *ctb,
> >>>>
> >>>>      for (unsigned i = 0; i < N_CT_TM; i++) {
> >>>>          LIST_FOR_EACH_SAFE (conn, next, exp_node,
> &ctb->exp_lists[i]) {
> >>>> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> >>>> -                if (!conn_expired(conn, now) || count >= limit) {
> >>>> -                    min_expiration = MIN(min_expiration,
> >>> conn->expiration);
> >>>> -                    if (count >= limit) {
> >>>> -                        /* Do not check other lists. */
> >>>> -                        COVERAGE_INC(conntrack_long_cleanup);
> >>>> -                        return min_expiration;
> >>>> -                    }
> >>>> -                    break;
> >>>> +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> >>>> +            if (!conn_expired(conn, now) || count >= limit) {
> >>>> +                min_expiration = MIN(min_expiration,
> conn->expiration);
> >>>> +                if (count >= limit) {
> >>>> +                    /* Do not check other lists. */
> >>>> +                    COVERAGE_INC(conntrack_long_cleanup);
> >>>> +                    return min_expiration;
> >>>>                  }
> >>>> -                conn_clean(ct, conn, ctb);
> >>>> -                count++;
> >>>> +                break;
> >>>>              }
> >>>> +            conn_clean(ct, conn, ctb);
> >>>> +            count++;
> >>>>          }
> >>>>      }
> >>>>      return min_expiration;
> >>>> @@ -2540,16 +2562,18 @@ int
> >>>>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> >>>>  {
> >>>>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> >>>> -        struct conn *conn, *next;
> >>>> -
> >>>> -        ct_lock_lock(&ct->buckets[i].lock);
> >>>> -        HMAP_FOR_EACH_SAFE (conn, next, node,
> >>> &ct->buckets[i].connections)
> >>>> {
> >>>> -            if ((!zone || *zone == conn->key.zone) &&
> >>>> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> >>>> -                conn_clean(ct, conn, &ct->buckets[i]);
> >>>> +        struct conntrack_bucket *ctb = &ct->buckets[i];
> >>>> +        ct_lock_lock(&ctb->lock);
> >>>> +        for (unsigned j = 0; j < N_CT_TM; j++) {
> >>>> +            struct conn *conn, *next;
> >>>> +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
> >>> &ctb->exp_lists[j]) {
> >>>> +                if (!zone || *zone == conn->key.zone) {
> >>>> +                    ovs_assert(conn->conn_type ==
> >>> CT_CONN_TYPE_DEFAULT);
> >>>> +                    conn_clean(ct, conn, ctb);
> >>>> +                }
> >>>>              }
> >>>>          }
> >>>> -        ct_lock_unlock(&ct->buckets[i].lock);
> >>>> +        ct_lock_unlock(&ctb->lock);
> >>>>      }
> >>>>
> >>>>      return 0;
> >>>> (END)
> >>>>
> >>>>
> >>>> On Wed, Mar 6, 2019 at 11:33 PM solomon <[email protected]>
> >>> wrote:
> >>>>
> >>>>> Darrell Ball wrote:
> >>>>>> +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
> >>>>> &ctb->exp_lists[j]) {
> >>>>>> +                if (!zone || *zone == conn->key.zone) {
> >>>>>> +                    ovs_assert(conn->conn_type ==
> >>> CT_CONN_TYPE_DEFAULT);
> >>>>>
> >>>>> why do we need this assert?
> >>>>> Clean the CT_CONN_TYPE_DEFAULT type in conntrack_flush(), and clean
> >>>>> CT_CONN_TYPE_UN_NAT in nat_clean() like following:
> >>>>> +                if ((!zone || *zone == conn->key.zone) &&
> >>>>> +                    (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> >>>>> +                    //ovs_assert(conn->conn_type ==
> >>> CT_CONN_TYPE_DEFAULT);
> >>>>>
> >>>>>
> >>>>> with the above code, catch an another panic which in ct_clean thread.
> >>>>> I have see the same panic without changeing the code.
> >>>>> Both ct_clean and flush-conntrack, can catch the bad bucket value.
> >>>>>
> >>>>> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
> >>>>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
> >>>>> 287         while (*bucket != node) {
> >>>>> [Current thread is 1 (Thread 0x7f948ffff700 (LWP 2085796))]
> >>>>> (gdb) bt
> >>>>> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
> >>>>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
> >>>>> #1  conn_clean (ct=ct@entry=0x7f9498700d98, conn=0x7f871bdc41b0,
> >>>>> ctb=ctb@entry=0x7f9498701c38) at lib/conntrack.c:815
> >>>>> #2  0x0000562ae7402a28 in sweep_bucket (limit=3906, now=1223168469,
> >>>>> ctb=<optimized out>, ct=0x7f9498700d98) at lib/conntrack.c:1396
> >>>>> #3  conntrack_clean (now=1223168469, ct=0x7f9498700d98) at
> >>>>> lib/conntrack.c:1433
> >>>>> #4  clean_thread_main (f_=0x7f9498700d98) at lib/conntrack.c:1488
> >>>>> #5  0x0000562ae737a48f in ovsthread_wrapper (aux_=<optimized out>) at
> >>>>> lib/ovs-thread.c:354
> >>>>> #6  0x00007f9497715494 in start_thread (arg=0x7f948ffff700) at
> >>>>> pthread_create.c:333
> >>>>> #7  0x00007f9496d09acf in clone () at
> >>>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> >>>>> (gdb) p bucket
> >>>>> $1 = (struct hmap_node **) 0x8  <====== why is bucket not a point
> >>> value?
> >>>>> (gdb) p *(struct hmap *) 0x7f9498701c68
> >>>>> $2 = {buckets = 0x7f8609f8fc00, one = 0x0, mask = 32767, n = 31040}
> >>>>> (gdb) p *(struct hmap_node *) 0x7f871bdc4258
> >>>>> $3 = {hash = 203059667, next = 0x7f8707cfe6c8}
> >>>>> (gdb) p 203059667&32767
> >>>>> $4 = 29139
> >>>>> (gdb) p &hmap->buckets[29139]
> >>>>> $5 = (struct hmap_node **) 0x7f8609fc8a98
> >>>>>
> >>>>>
> >>>>>> +                    conn_clean(ct, conn, ctb);
> >>>>>> +                }
> >>>>>>              }
> >>>>>>          }
> >>>>>> -        ct_lock_unlock(&ct->buckets[i].lock);
> >>>>>> +        ct_lock_unlock(&ctb->lock);
> >>>>>>      }
> >>>>>>
> >>>>>>      return 0;
> >>>>>>
> >>>>>>
> >>>>>> which yields conntrack_flush(...) as
> >>>>>>
> >>>>>> int
> >>>>>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> >>>>>> {
> >>>>>>     for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> >>>>>>         struct conntrack_bucket *ctb = &ct->buckets[i];
> >>>>>>         ct_lock_lock(&ctb->lock);
> >>>>>>         for (unsigned j = 0; j < N_CT_TM; j++) {
> >>>>>>             struct conn *conn, *next;
> >>>>>>             LIST_FOR_EACH_SAFE (conn, next, exp_node,
> >>>>> &ctb->exp_lists[j]) {
> >>>>>>                 if (!zone || *zone == conn->key.zone) {
> >>>>>>                     ovs_assert(conn->conn_type ==
> >>> CT_CONN_TYPE_DEFAULT);
> >>>>>>                     conn_clean(ct, conn, ctb);
> >>>>>>                 }
> >>>>>>             }
> >>>>>>         }
> >>>>>>         ct_lock_unlock(&ctb->lock);
> >>>>>>     }
> >>>>>>
> >>>>>>     return 0;
> >>>>>> }
> >>>>>>
> >>>>>> Thanks Darrell
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> On Wed, Mar 6, 2019 at 8:06 PM solomon <[email protected]>
> >>> wrote:
> >>>>>>
> >>>>>>>
> >>>>>>>    When i test conntrack, i catch a panic of ovs.
> >>>>>>>
> >>>>>>> Core was generated by `ovs-vswitchd
> unix:/var/run/openvswitch/db.sock
> >>>>>>> -vconsole:emer -vsyslog:err -vfi'.
> >>>>>>> Program terminated with signal SIGSEGV, Segmentation fault.
> >>>>>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
> >>>>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
> >>>>>>> 287         while (*bucket != node) {
> >>>>>>> [Current thread is 1 (Thread 0x7f8178dccb00 (LWP 2024338))]
> >>>>>>> (gdb) bt
> >>>>>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
> >>>>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
> >>>>>>> #1  conn_clean (ct=ct@entry=0x7f8178c75d98, conn=0x7f734cde0170,
> >>>>>>> ctb=ctb@entry=0x7f8178c7fd40) at lib/conntrack.c:815
> >>>>>>> #2  0x00005605c5cdd66a in conntrack_flush (ct=0x7f8178c75d98,
> >>> zone=0x0)
> >>>>> at
> >>>>>>> lib/conntrack.c:2549
> >>>>>>> #3  0x00005605c5bc2b39 in ct_dpif_flush (dpif=0x5605c68a6430,
> >>>>>>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
> >>>>>>> #4  0x00005605c5ce17a0 in dpctl_flush_conntrack (argc=argc@entry
> =1,
> >>>>>>> argv=argv@entry=0x5605c697ec30, dpctl_p=dpctl_p@entry
> >>> =0x7fffee718110)
> >>>>> at
> >>>>>>> lib/dpctl.c:1388
> >>>>>>> #5  0x00005605c5cdeb78 in dpctl_unixctl_handler
> (conn=0x5605c6959ca0,
> >>>>>>> argc=1, argv=0x5605c697ec30, aux=0x5605c5ce1610
> >>>>> <dpctl_flush_conntrack>) at
> >>>>>>> lib/dpctl.c:2312
> >>>>>>> #6  0x00005605c5c806ea in process_command (request=<optimized out>,
> >>>>>>> conn=0x5605c6959ca0) at lib/unixctl.c:308
> >>>>>>> #7  run_connection (conn=0x5605c6959ca0) at lib/unixctl.c:342
> >>>>>>> #8  unixctl_server_run (server=0x5605c6868230) at lib/unixctl.c:393
> >>>>>>> #9  0x00005605c5804217 in main (argc=<optimized out>,
> argv=<optimized
> >>>>>>> out>) at vswitchd/ovs-vswitchd.c:126
> >>>>>>>
> >>>>>>>
> >>>>>>> Environment:
> >>>>>>> ovs-2.10.1
> >>>>>>> dpdk-18.0.2.2
> >>>>>>>
> >>>>>>> How-To-Repeat:
> >>>>>>> 1. configure ovs with snat aciton.
> >>>>>>>
> >>>>>>> ovs-ofctl  -O OpenFlow15 add-group $br_name "group_id=1,
> type=select,
> >>>>>>> selection_method=hash
> >>>>>>>
> >>>>>
> >>>
> bucket=bucket_id=1,weight:100,actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
> >>>>>>> "
> >>>>>>>
> >>>>>>> 2. syn-ddos send tcp syn packet to generate connection tracks.
> >>>>>>> 3.
> >>>>>>> #   ovs-appctl dpctl/ct-get-nconns
> >>>>>>> 2063993
> >>>>>>> #   ovs-appctl dpctl/flush-conntrack
> >>>>>>>
> >>>>>>> 2019-03-07T03:52:24Z|00001|unixctl|WARN|error communicating with
> >>>>>>> unix:/var/run/openvswitch/ovs-vswitchd.2024338.ctl: End of file
> >>>>>>> ovs-appctl: ovs-vswitchd: transaction error (End of file)
> >>>>>>>
> >>>>>>>
> >>>>>>> --
> >>>>>>> Thanks
> >>>>>>> Solomon
> >>>>>>>
> >>>>>>
> >>>>>
> >>>>> --
> >>>>>
> >>>>> Thanks
> >>>>> Solomon
> >>>>>
> >>>>
> >>>
> >>> --
> >>>
> >>> Thanks
> >>> Li Wei
> >>>
> >>
> >
>
> --
>
> Thanks
> Li Wei
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to