hi Darrell:
With your new patch(see attached file), still panic both in conntrack_flush()
and sweep_bucket().
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007f2d9af7942a in __GI_abort () at abort.c:89
#2 0x00007f2d9afb5c00 in __libc_message (do_abort=do_abort@entry=2,
fmt=fmt@entry=0x7f2d9b0aad98 "*** Error in `%s': %s: 0x%s ***\n")
at ../sysdeps/posix/libc_fatal.c:175
#3 0x00007f2d9afbbfc6 in malloc_printerr (action=3, str=0x7f2d9b0aaef0 "double
free or corruption (fasttop)", ptr=<optimized out>, ar_ptr=<optimized out>)
at malloc.c:5049
#4 0x00007f2d9afbc80e in _int_free (av=0x7f2698000020, p=0x7f1fcbb78440,
have_lock=0) at malloc.c:3905
#5 0x000055a2c3e0df90 in delete_conn (conn=conn@entry=0x7f1fcbb78340) at
lib/conntrack.c:2419
#6 0x000055a2c3e0f31c in nat_clean (ctb=0x7f2d9ca2cd90, conn=0x7f1fcbb78340,
ct=0x7f2d9ca24d98) at lib/conntrack.c:843
#7 conn_clean (ct=ct@entry=0x7f2d9ca24d98, conn=0x7f1fcbb78340,
ctb=ctb@entry=0x7f2d9ca2cd90) at lib/conntrack.c:869
#8 0x000055a2c3e14820 in conntrack_flush (ct=0x7f2d9ca24d98, zone=0x0) at
lib/conntrack.c:2613
#9 0x000055a2c3cf9b39 in ct_dpif_flush (dpif=0x55a2c4e7b6f0,
zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
#10 0x000055a2c3e18970 in dpctl_flush_conntrack (argc=argc@entry=1,
argv=argv@entry=0x55a2c4dd2550, dpctl_p=dpctl_p@entry=0x7ffd9fda7600) at
lib/dpctl.c:1388
#11 0x000055a2c3e15d48 in dpctl_unixctl_handler (conn=0x55a2c4d9cf50, argc=1,
argv=0x55a2c4dd2550, aux=0x55a2c3e187e0 <dpctl_flush_conntrack>) at
lib/dpctl.c:2312
#12 0x000055a2c3db76ea in process_command (request=<optimized out>,
conn=0x55a2c4d9cf50) at lib/unixctl.c:308
#13 run_connection (conn=0x55a2c4d9cf50) at lib/unixctl.c:342
#14 unixctl_server_run (server=0x55a2c4d94230) at lib/unixctl.c:393
#15 0x000055a2c393b217 in main (argc=<optimized out>, argv=<optimized out>) at
vswitchd/ovs-vswitchd.c:126
# cat /var/log/openvswitch/ovs-vswitchd.log|grep conntrack
2019-03-11T03:36:31.293Z|00001|conntrack(pmd61)|WARN|Unusual condition for
un_nat conn create: nat_conn_key_node/rev_conn (nil)/(nil): src ip
222.15.63.163 dst ip 172.89.78.140 rev src ip 2.128.173.3 rev dst ip
222.15.63.163 src/dst ports 80/10623 rev src/dst ports 10623/80 zone/rev zone
0/0 nw_proto/rev nw_proto 6/6
2019-03-11T03:36:33.599Z|00001|conntrack(pmd82)|WARN|Unusual condition for
un_nat conn create: nat_conn_key_node/rev_conn (nil)/(nil): src ip
222.15.63.163 dst ip 172.31.235.224 rev src ip 2.143.150.201 rev dst ip
222.15.63.163 src/dst ports 80/17937 rev src/dst ports 17937/80 zone/rev zone
0/0 nw_proto/rev nw_proto 6/6
2019-03-11T03:36:33.907Z|00054|coverage(ct_clean3)|INFO|conntrack_full
13374.0/sec 61885.550/sec 1031.4258/sec total: 3718506
2019-03-11T03:36:33.907Z|00055|coverage(ct_clean3)|INFO|conntrack_long_cleanup
0.0/sec 0.000/sec 0.0000/sec total: 188
2019-03-11T03:36:33.907Z|00059|poll_loop(ct_clean3)|INFO|wakeup due to 0-ms
timeout at lib/conntrack.c:1554 (64% CPU usage)
2019-03-11T03:36:34.004Z|00221|conntrack|WARN|conn_clean: conn not present in
hmap: src ip 240.132.183.203 dst ip 222.15.63.163 rev src ip 222.15.63.163 rev
dst ip 172.68.216.36 src/dst ports 31344/80 rev src/dst ports 80/31344 zone/rev
zone 0/0 nw_proto/rev nw_proto 6/6
Darrell Ball wrote:
> Hi Solomon
>
> There was a typo in nat_clean(...)
>
> ct_rwlock_unlock(&ct->resources_lock);
> ct_lock_unlock(&ctb->lock);
> uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> - unsigned bucket_rev_conn =
> - hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
> + unsigned bucket_rev_conn = hash_to_bucket(hash);
> ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
> ct_rwlock_wrlock(&ct->resources_lock);
>
> I also cleaned up the patch a little more and ended up with:
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 5410ab4..68d9816 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const struct
> conn_key *key2)
> return 1;
> }
>
> +static bool
> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> + const struct conn_key *key)
> + OVS_REQUIRES(ctb->lock)
> +{
> + struct conn *conn;
> + uint32_t hash = conn_key_hash(key, ct->hash_basis);
> +
> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> + if (!conn_key_cmp(&conn->key, key)) {
> + return true;
> + }
> + }
> + return false;
> +}
> +
> static void
> ct_print_conn_info(const struct conn *c, const char *log_msg,
> enum vlog_level vll, bool force, bool rl_on)
> @@ -738,29 +754,53 @@ un_nat_packet(struct dp_packet *pkt, const struct
> conn *conn,
> }
> }
>
> -/* Typical usage of this helper is in non per-packet code;
> - * this is because the bucket lock needs to be held for lookup
> - * and a hash would have already been needed. Hence, this function
> - * is just intended for code clarity. */
> +/* This function is called with the bucket lock held. */
> static struct conn *
> -conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
> now)
> +conn_lookup_any(const struct conn_key *key,
> + const struct conntrack_bucket *ctb, uint32_t hash)
> {
> - struct conn_lookup_ctx ctx;
> - ctx.conn = NULL;
> - ctx.key = *key;
> - ctx.hash = conn_key_hash(key, ct->hash_basis);
> - unsigned bucket = hash_to_bucket(ctx.hash);
> - conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> - return ctx.conn;
> + struct conn *conn = NULL;
> +
> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> + if (!conn_key_cmp(&conn->key, key)) {
> + break;
> + }
> + if (!conn_key_cmp(&conn->rev_key, key)) {
> + break;
> + }
> + }
> + return conn;
> +}
> +
> +/* This function is called with the bucket lock held. */
> +static struct conn *
> +conn_lookup_unnat(const struct conn_key *key,
> + const struct conntrack_bucket *ctb, uint32_t hash)
> +{
> + struct conn *conn = NULL;
> +
> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> + if (!conn_key_cmp(&conn->key, key)
> + && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
> + break;
> + }
> + }
> + return conn;
> }
>
> static void
> conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
> long long now, int seq_skew, bool seq_skew_dir)
> {
> - unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
> + uint32_t hash = conn_key_hash(key, ct->hash_basis);
> + unsigned bucket = hash_to_bucket(hash);
> ct_lock_lock(&ct->buckets[bucket].lock);
> - struct conn *conn = conn_lookup(ct, key, now);
> + struct conn_lookup_ctx ctx;
> + ctx.key = *key;
> + ctx.hash = hash;
> + conn_key_lookup(&ct->buckets[bucket], &ctx, now);
> + struct conn *conn = ctx.conn;
> +
> if (conn && seq_skew) {
> conn->seq_skew = seq_skew;
> conn->seq_skew_dir = seq_skew_dir;
> @@ -777,12 +817,13 @@ nat_clean(struct conntrack *ct, struct conn *conn,
> nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
> ct->hash_basis);
> ct_rwlock_unlock(&ct->resources_lock);
> ct_lock_unlock(&ctb->lock);
> - unsigned bucket_rev_conn =
> - hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
> + uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
> + unsigned bucket_rev_conn = hash_to_bucket(hash);
> ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
> ct_rwlock_wrlock(&ct->resources_lock);
> - long long now = time_msec();
> - struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
> + struct conn *rev_conn = conn_lookup_unnat(&conn->rev_key,
> +
> &ct->buckets[bucket_rev_conn],
> + hash);
> struct nat_conn_key_node *nat_conn_key_node =
> nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
> ct->hash_basis);
> @@ -812,7 +853,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
> expectation_clean(ct, &conn->key, ct->hash_basis);
> }
> ovs_list_remove(&conn->exp_node);
> - hmap_remove(&ctb->connections, &conn->node);
> +
> + /* Temporary debug check. */
> + if (conn_key_present(ct, ctb, &conn->key)) {
> + hmap_remove(&ctb->connections, &conn->node);
> + } else {
> + char *log_msg = xasprintf("conn_clean: conn not present in hmap");
> + ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> + free(log_msg);
> + }
> +
> atomic_count_dec(&ct->n_conn);
> if (conn->nat_info) {
> nat_clean(ct, conn, ctb);
> @@ -1005,7 +1055,7 @@ conn_update_state(struct conntrack *ct, struct
> dp_packet *pkt,
>
> static void
> create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
> - long long now, bool alg_un_nat)
> + bool alg_un_nat)
> {
> struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
> nc->key = conn_for_un_nat_copy->rev_key;
> @@ -1013,7 +1063,9 @@ create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
> uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
> unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
> ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
> - struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
> + struct conn *rev_conn = conn_lookup_any(&nc->key,
> +
> &ct->buckets[un_nat_conn_bucket],
> + un_nat_hash);
>
> if (alg_un_nat) {
> if (!rev_conn) {
> @@ -1022,7 +1074,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
> *conn_for_un_nat_copy,
> } else {
> char *log_msg = xasprintf("Unusual condition for un_nat conn "
> "create for alg: rev_conn %p",
> rev_conn);
> - ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> + ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> free(log_msg);
> free(nc);
> }
> @@ -1030,16 +1082,26 @@ create_un_nat_conn(struct conntrack *ct, struct
> conn *conn_for_un_nat_copy,
> ct_rwlock_rdlock(&ct->resources_lock);
>
> struct nat_conn_key_node *nat_conn_key_node =
> - nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> ct->hash_basis);
> + nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
> ct->hash_basis);
> if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
> - &nc->rev_key) && !rev_conn) {
> - hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> - &nc->node, un_nat_hash);
> + &nc->rev_key)) {
> + if (!rev_conn) {
> + hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
> + &nc->node, un_nat_hash);
> + } else {
> + char *log_msg = xasprintf("NAT conflict; un-nat will not"
> + "happen; likely DOS");
> + ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> + free(log_msg);
> + nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
> + ct->hash_basis);
> + free(nc);
> + }
> } else {
> char *log_msg = xasprintf("Unusual condition for un_nat conn "
> "create: nat_conn_key_node/rev_conn "
> - "%p/%p", nat_conn_key_node,
> rev_conn);
> - ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
> + "%p", nat_conn_key_node);
> + ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
> free(log_msg);
> free(nc);
> }
> @@ -1286,7 +1348,7 @@ process_one(struct conntrack *ct, struct dp_packet
> *pkt,
> ct_lock_unlock(&ct->buckets[bucket].lock);
>
> if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
> - create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
> + create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
> }
>
> handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
> @@ -1383,19 +1445,18 @@ sweep_bucket(struct conntrack *ct, struct
> conntrack_bucket *ctb,
>
> for (unsigned i = 0; i < N_CT_TM; i++) {
> LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> - if (!conn_expired(conn, now) || count >= limit) {
> - min_expiration = MIN(min_expiration, conn->expiration);
> - if (count >= limit) {
> - /* Do not check other lists. */
> - COVERAGE_INC(conntrack_long_cleanup);
> - return min_expiration;
> - }
> - break;
> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> + if (!conn_expired(conn, now) || count >= limit) {
> + min_expiration = MIN(min_expiration, conn->expiration);
> + if (count >= limit) {
> + /* Do not check other lists. */
> + COVERAGE_INC(conntrack_long_cleanup);
> + return min_expiration;
> }
> - conn_clean(ct, conn, ctb);
> - count++;
> + break;
> }
> + conn_clean(ct, conn, ctb);
> + count++;
> }
> }
> return min_expiration;
> @@ -2540,16 +2601,18 @@ int
> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> {
> for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> - struct conn *conn, *next;
> -
> - ct_lock_lock(&ct->buckets[i].lock);
> - HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections)
> {
> - if ((!zone || *zone == conn->key.zone) &&
> - (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> - conn_clean(ct, conn, &ct->buckets[i]);
> + struct conntrack_bucket *ctb = &ct->buckets[i];
> + ct_lock_lock(&ctb->lock);
> + for (unsigned j = 0; j < N_CT_TM; j++) {
> + struct conn *conn, *next;
> + LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
> + if (!zone || *zone == conn->key.zone) {
> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> + conn_clean(ct, conn, ctb);
> + }
> }
> }
> - ct_lock_unlock(&ct->buckets[i].lock);
> + ct_lock_unlock(&ctb->lock);
> }
>
> return 0;
> (END)
>
>
>
>
> On Fri, Mar 8, 2019 at 8:07 AM Darrell Ball <[email protected]> wrote:
>
>> Thanks for confirming Solomon; can you check the following patch on your
>> side as well ?
>>
>> Darrell
>>
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 5410ab4..c6b06d6 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
>> struct conn_key *key2)
>> return 1;
>> }
>>
>> +static bool
>> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
>> + const struct conn_key *key)
>> + OVS_REQUIRES(ctb->lock)
>> +{
>> + struct conn *conn;
>> + uint32_t hash = conn_key_hash(key, ct->hash_basis);
>> +
>> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>> + if (!conn_key_cmp(&conn->key, key)) {
>> + return true;
>> + }
>> + }
>> + return false;
>> +}
>> +
>> static void
>> ct_print_conn_info(const struct conn *c, const char *log_msg,
>> enum vlog_level vll, bool force, bool rl_on)
>> @@ -738,29 +754,55 @@ un_nat_packet(struct dp_packet *pkt, const struct
>> conn *conn,
>> }
>> }
>>
>> -/* Typical usage of this helper is in non per-packet code;
>> - * this is because the bucket lock needs to be held for lookup
>> - * and a hash would have already been needed. Hence, this function
>> - * is just intended for code clarity. */
>> static struct conn *
>> -conn_lookup(struct conntrack *ct, const struct conn_key *key, long long
>> now)
>> +conn_lookup_any(struct conntrack *ct, const struct conn_key *key,
>> + uint32_t hash)
>> {
>> - struct conn_lookup_ctx ctx;
>> - ctx.conn = NULL;
>> - ctx.key = *key;
>> - ctx.hash = conn_key_hash(key, ct->hash_basis);
>> - unsigned bucket = hash_to_bucket(ctx.hash);
>> - conn_key_lookup(&ct->buckets[bucket], &ctx, now);
>> - return ctx.conn;
>> + unsigned bucket = hash_to_bucket(hash);
>> + struct conntrack_bucket *ctb = &ct->buckets[bucket];
>> + struct conn *conn = NULL;
>> +
>> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>> + if (!conn_key_cmp(&conn->key, key)) {
>> + break;
>> + }
>> + if (!conn_key_cmp(&conn->rev_key, key)) {
>> + break;
>> + }
>> + }
>> + return conn;
>> +}
>> +
>> +static struct conn *
>> +conn_lookup_unnat(struct conntrack *ct, const struct conn_key *key,
>> + uint32_t hash)
>> +{
>> + unsigned bucket = hash_to_bucket(hash);
>> + struct conntrack_bucket *ctb = &ct->buckets[bucket];
>> + struct conn *conn = NULL;
>> +
>> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>> + if (!conn_key_cmp(&conn->key, key)
>> + && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
>> + break;
>> + }
>> + }
>> + return conn;
>> }
>>
>> static void
>> conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
>> long long now, int seq_skew, bool seq_skew_dir)
>> {
>> - unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
>> + uint32_t hash = conn_key_hash(key, ct->hash_basis);
>> + unsigned bucket = hash_to_bucket(hash);
>> ct_lock_lock(&ct->buckets[bucket].lock);
>> - struct conn *conn = conn_lookup(ct, key, now);
>> + struct conn_lookup_ctx ctx;
>> + ctx.key = *key;
>> + ctx.hash = hash;
>> + conn_key_lookup(&ct->buckets[bucket], &ctx, now);
>> + struct conn *conn = ctx.conn;
>> +
>> if (conn && seq_skew) {
>> conn->seq_skew = seq_skew;
>> conn->seq_skew_dir = seq_skew_dir;
>> @@ -777,12 +819,12 @@ nat_clean(struct conntrack *ct, struct conn *conn,
>> nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key,
>> ct->hash_basis);
>> ct_rwlock_unlock(&ct->resources_lock);
>> ct_lock_unlock(&ctb->lock);
>> + uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
>> unsigned bucket_rev_conn =
>> - hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
>> + hash_to_bucket(conn_key_hash(&conn->rev_key, hash));
>> ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
>> ct_rwlock_wrlock(&ct->resources_lock);
>> - long long now = time_msec();
>> - struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
>> + struct conn *rev_conn = conn_lookup_unnat(ct, &conn->rev_key, hash);
>> struct nat_conn_key_node *nat_conn_key_node =
>> nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
>> ct->hash_basis);
>> @@ -812,7 +854,16 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>> expectation_clean(ct, &conn->key, ct->hash_basis);
>> }
>> ovs_list_remove(&conn->exp_node);
>> - hmap_remove(&ctb->connections, &conn->node);
>> +
>> + /* Temporary debug check. */
>> + if (conn_key_present(ct, ctb, &conn->key)) {
>> + hmap_remove(&ctb->connections, &conn->node);
>> + } else {
>> + char *log_msg = xasprintf("conn_clean: conn not present in hmap");
>> + ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
>> + free(log_msg);
>> + }
>> +
>> atomic_count_dec(&ct->n_conn);
>> if (conn->nat_info) {
>> nat_clean(ct, conn, ctb);
>> @@ -1005,7 +1056,7 @@ conn_update_state(struct conntrack *ct, struct
>> dp_packet *pkt,
>>
>> static void
>> create_un_nat_conn(struct conntrack *ct, struct conn
>> *conn_for_un_nat_copy,
>> - long long now, bool alg_un_nat)
>> + bool alg_un_nat)
>> {
>> struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
>> nc->key = conn_for_un_nat_copy->rev_key;
>> @@ -1013,7 +1064,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
>> *conn_for_un_nat_copy,
>> uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
>> unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
>> ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
>> - struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
>> + struct conn *rev_conn = conn_lookup_any(ct, &nc->key, un_nat_hash);
>>
>> if (alg_un_nat) {
>> if (!rev_conn) {
>> @@ -1022,7 +1073,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
>> *conn_for_un_nat_copy,
>> } else {
>> char *log_msg = xasprintf("Unusual condition for un_nat conn "
>> "create for alg: rev_conn %p",
>> rev_conn);
>> - ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
>> + ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>> free(log_msg);
>> free(nc);
>> }
>> @@ -1030,16 +1081,26 @@ create_un_nat_conn(struct conntrack *ct, struct
>> conn *conn_for_un_nat_copy,
>> ct_rwlock_rdlock(&ct->resources_lock);
>>
>> struct nat_conn_key_node *nat_conn_key_node =
>> - nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
>> ct->hash_basis);
>> + nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key,
>> ct->hash_basis);
>> if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
>> - &nc->rev_key) && !rev_conn) {
>> - hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
>> - &nc->node, un_nat_hash);
>> + &nc->rev_key)) {
>> + if (!rev_conn) {
>> + hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
>> + &nc->node, un_nat_hash);
>> + } else {
>> + char *log_msg = xasprintf("NAT conflict; un-nat will not"
>> + "happen; likely DOS");
>> + ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>> + free(log_msg);
>> + nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
>> + ct->hash_basis);
>> + free(nc);
>> + }
>> } else {
>> char *log_msg = xasprintf("Unusual condition for un_nat conn "
>> "create: nat_conn_key_node/rev_conn
>> "
>> - "%p/%p", nat_conn_key_node,
>> rev_conn);
>> - ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
>> + "%p", nat_conn_key_node);
>> + ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
>> free(log_msg);
>> free(nc);
>> }
>> @@ -1286,7 +1347,7 @@ process_one(struct conntrack *ct, struct dp_packet
>> *pkt,
>> ct_lock_unlock(&ct->buckets[bucket].lock);
>>
>> if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
>> - create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
>> + create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
>> }
>>
>> handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
>> @@ -1383,19 +1444,18 @@ sweep_bucket(struct conntrack *ct, struct
>> conntrack_bucket *ctb,
>>
>> for (unsigned i = 0; i < N_CT_TM; i++) {
>> LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
>> - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>> - if (!conn_expired(conn, now) || count >= limit) {
>> - min_expiration = MIN(min_expiration,
>> conn->expiration);
>> - if (count >= limit) {
>> - /* Do not check other lists. */
>> - COVERAGE_INC(conntrack_long_cleanup);
>> - return min_expiration;
>> - }
>> - break;
>> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>> + if (!conn_expired(conn, now) || count >= limit) {
>> + min_expiration = MIN(min_expiration, conn->expiration);
>> + if (count >= limit) {
>> + /* Do not check other lists. */
>> + COVERAGE_INC(conntrack_long_cleanup);
>> + return min_expiration;
>> }
>> - conn_clean(ct, conn, ctb);
>> - count++;
>> + break;
>> }
>> + conn_clean(ct, conn, ctb);
>> + count++;
>> }
>> }
>> return min_expiration;
>> @@ -2540,16 +2600,18 @@ int
>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>> {
>> for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>> - struct conn *conn, *next;
>> -
>> - ct_lock_lock(&ct->buckets[i].lock);
>> - HMAP_FOR_EACH_SAFE (conn, next, node,
>> &ct->buckets[i].connections) {
>> - if ((!zone || *zone == conn->key.zone) &&
>> - (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>> - conn_clean(ct, conn, &ct->buckets[i]);
>> + struct conntrack_bucket *ctb = &ct->buckets[i];
>> + ct_lock_lock(&ctb->lock);
>> + for (unsigned j = 0; j < N_CT_TM; j++) {
>> + struct conn *conn, *next;
>> + LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j])
>> {
>> + if (!zone || *zone == conn->key.zone) {
>> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>> + conn_clean(ct, conn, ctb);
>> + }
>> }
>> }
>> - ct_lock_unlock(&ct->buckets[i].lock);
>> + ct_lock_unlock(&ctb->lock);
>> }
>>
>> return 0;
>> (END)
>>
>>
>> On Thu, Mar 7, 2019 at 5:30 PM Li Wei <[email protected]> wrote:
>>
>>> hi darrell:
>>>
>>> I set nat action with
>>> actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>>>
>>> With your patch, new double free panic happens in conntrack_flush() and
>>> sweep_bucket():
>>>
>>> ==1st panic==
>>> [Thread debugging using libthread_db enabled]
>>> Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>>> -vconsole:emer -vsyslog:err -vfi'.
>>> Program terminated with signal SIGABRT, Aborted.
>>> #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>>> 51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
>>> [Current thread is 1 (Thread 0x7f92b122cb00 (LWP 2387347))]
>>> (gdb) bt
>>> #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>>> #1 0x00007f92af62a42a in __GI_abort () at abort.c:89
>>> #2 0x00007f92af666c00 in __libc_message (do_abort=do_abort@entry=2,
>>> fmt=fmt@entry=0x7f92af75bd98 "*** Error in `%s': %s: 0x%s ***\n")
>>> at ../sysdeps/posix/libc_fatal.c:175
>>> #3 0x00007f92af66cfc6 in malloc_printerr (action=3, str=0x7f92af75bef0
>>> "double free or corruption (fasttop)", ptr=<optimized out>,
>>> ar_ptr=<optimized out>)
>>> at malloc.c:5049
>>> #4 0x00007f92af66d80e in _int_free (av=0x7f8bb4000020, p=0x7f8bb70ef770,
>>> have_lock=0) at malloc.c:3905
>>> #5 0x00005622735d3f90 in delete_conn (conn=conn@entry=0x7f8bb70ef670)
>>> at lib/conntrack.c:2380
>>> #6 0x00005622735d52ad in nat_clean (ctb=0x7f92b10da7f0,
>>> conn=0x7f8bb70ef670, ct=0x7f92b10d5d98) at lib/conntrack.c:816
>>> #7 conn_clean (ct=ct@entry=0x7f92b10d5d98, conn=0x7f8bb70ef670,
>>> ctb=ctb@entry=0x7f92b10da7f0) at lib/conntrack.c:842
>>> #8 0x00005622735da710 in conntrack_flush (ct=0x7f92b10d5d98, zone=0x0)
>>> at lib/conntrack.c:2574
>>> #9 0x00005622734bfb39 in ct_dpif_flush (dpif=0x5622758671d0,
>>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
>>> #10 0x00005622735de860 in dpctl_flush_conntrack (argc=argc@entry=1,
>>> argv=argv@entry=0x56227589cc40, dpctl_p=dpctl_p@entry=0x7fff9087fe90) at
>>> lib/dpctl.c:1388
>>> #11 0x00005622735dbc38 in dpctl_unixctl_handler (conn=0x56227589bc90,
>>> argc=1, argv=0x56227589cc40, aux=0x5622735de6d0 <dpctl_flush_conntrack>) at
>>> lib/dpctl.c:2312
>>> #12 0x000056227357d6ea in process_command (request=<optimized out>,
>>> conn=0x56227589bc90) at lib/unixctl.c:308
>>> #13 run_connection (conn=0x56227589bc90) at lib/unixctl.c:342
>>> #14 unixctl_server_run (server=0x5622757af230) at lib/unixctl.c:393
>>> #15 0x0000562273101217 in main (argc=<optimized out>, argv=<optimized
>>> out>) at vswitchd/ovs-vswitchd.c:126
>>>
>>> The debug info in /var/log/openvswitch/ovs-vswitchd.log:
>>> 70 2019-03-08T00:54:31.278Z|00227|conntrack|WARN|conn_clean: conn not
>>> present in hmap: src ip 32.248.14.183 dst ip 222.15.63.163 rev src ip
>>> 222.15.63.163 rev dst ip 172.112.98.138 src/dst ports 54957/80 rev
>>> src/dst ports 80/54957 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
>>>
>>> ==sec panic==
>>> (gdb) bt
>>> #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>>> #1 0x00007faece4b642a in __GI_abort () at abort.c:89
>>> #2 0x00007faece4f2c00 in __libc_message (do_abort=do_abort@entry=2,
>>> fmt=fmt@entry=0x7faece5e7d98 "*** Error in `%s': %s: 0x%s ***\n")
>>> at ../sysdeps/posix/libc_fatal.c:175
>>> #3 0x00007faece4f8fc6 in malloc_printerr (action=3, str=0x7faece5e7e60
>>> "double free or corruption (out)", ptr=<optimized out>, ar_ptr=<optimized
>>> out>)
>>> at malloc.c:5049
>>> #4 0x00007faece4f980e in _int_free (av=0x7faece81bb00 <main_arena>,
>>> p=0x7fa11c50ee30, have_lock=0) at malloc.c:3905
>>> #5 0x0000562c63ac82ad in nat_clean (ctb=0x7faecff65cf8,
>>> conn=0x7fa11c50ee40, ct=0x7faecff61d98) at lib/conntrack.c:816
>>> #6 conn_clean (ct=ct@entry=0x7faecff61d98, conn=0x7fa11c50ee40,
>>> ctb=ctb@entry=0x7faecff65cf8) at lib/conntrack.c:842
>>> #7 0x0000562c63ac8639 in sweep_bucket (limit=3906, now=1287899232,
>>> ctb=<optimized out>, ct=0x7faecff61d98) at lib/conntrack.c:1421
>>> #8 conntrack_clean (now=1287899232, ct=0x7faecff61d98) at
>>> lib/conntrack.c:1457
>>> #9 clean_thread_main (f_=0x7faecff61d98) at lib/conntrack.c:1512
>>> #10 0x0000562c63a3f48f in ovsthread_wrapper (aux_=<optimized out>) at
>>> lib/ovs-thread.c:354
>>> #11 0x00007faecef76494 in start_thread (arg=0x7faec77fe700) at
>>> pthread_create.c:333
>>> #12 0x00007faece56aacf in clone () at
>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>>>
>>> 2019-03-08T01:15:16.929Z|00055|conntrack(ct_clean3)|WARN|conn_clean: conn
>>> not present in hmap: src ip 2.92.142.188 dst ip 222.15.63.163 rev src ip
>>> 222.15.63.163 rev dst ip 172.116.154.125 src/dst ports 23446/80 rev src/dst
>>> ports 80/23446 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
>>>
>>>
>>> Darrell Ball wrote:
>>>> Thanks for your help Solomon
>>>>
>>>> Can you give the following debug patch a spin ?
>>>> I will continue to try to repo locally.
>>>>
>>>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>>>> index 5410ab4..6968c03 100644
>>>> --- a/lib/conntrack.c
>>>> +++ b/lib/conntrack.c
>>>> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const
>>> struct
>>>> conn_key *key2)
>>>> return 1;
>>>> }
>>>>
>>>> +static bool
>>>> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
>>>> + const struct conn_key *key)
>>>> + OVS_REQUIRES(ctb->lock)
>>>> +{
>>>> + struct conn *conn;
>>>> + uint32_t hash = conn_key_hash(key, ct->hash_basis);
>>>> +
>>>> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
>>>> + if (!conn_key_cmp(&conn->key, key)) {
>>>> + return true;
>>>> + }
>>>> + }
>>>> + return false;
>>>> +}
>>>> +
>>>> static void
>>>> ct_print_conn_info(const struct conn *c, const char *log_msg,
>>>> enum vlog_level vll, bool force, bool rl_on)
>>>> @@ -812,7 +828,14 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>>>> expectation_clean(ct, &conn->key, ct->hash_basis);
>>>> }
>>>> ovs_list_remove(&conn->exp_node);
>>>> - hmap_remove(&ctb->connections, &conn->node);
>>>> +
>>>> + if (conn_key_present(ct, ctb, &conn->key)) {
>>>> + hmap_remove(&ctb->connections, &conn->node);
>>>> + } else {
>>>> + char *log_msg = xasprintf("conn_clean: conn not present in
>>> hmap");
>>>> + ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
>>>> + free(log_msg);
>>>> + }
>>>> atomic_count_dec(&ct->n_conn);
>>>> if (conn->nat_info) {
>>>> nat_clean(ct, conn, ctb);
>>>> @@ -1383,19 +1406,18 @@ sweep_bucket(struct conntrack *ct, struct
>>>> conntrack_bucket *ctb,
>>>>
>>>> for (unsigned i = 0; i < N_CT_TM; i++) {
>>>> LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
>>>> - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>>>> - if (!conn_expired(conn, now) || count >= limit) {
>>>> - min_expiration = MIN(min_expiration,
>>> conn->expiration);
>>>> - if (count >= limit) {
>>>> - /* Do not check other lists. */
>>>> - COVERAGE_INC(conntrack_long_cleanup);
>>>> - return min_expiration;
>>>> - }
>>>> - break;
>>>> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>>> + if (!conn_expired(conn, now) || count >= limit) {
>>>> + min_expiration = MIN(min_expiration, conn->expiration);
>>>> + if (count >= limit) {
>>>> + /* Do not check other lists. */
>>>> + COVERAGE_INC(conntrack_long_cleanup);
>>>> + return min_expiration;
>>>> }
>>>> - conn_clean(ct, conn, ctb);
>>>> - count++;
>>>> + break;
>>>> }
>>>> + conn_clean(ct, conn, ctb);
>>>> + count++;
>>>> }
>>>> }
>>>> return min_expiration;
>>>> @@ -2540,16 +2562,18 @@ int
>>>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>>>> {
>>>> for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>>>> - struct conn *conn, *next;
>>>> -
>>>> - ct_lock_lock(&ct->buckets[i].lock);
>>>> - HMAP_FOR_EACH_SAFE (conn, next, node,
>>> &ct->buckets[i].connections)
>>>> {
>>>> - if ((!zone || *zone == conn->key.zone) &&
>>>> - (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>>>> - conn_clean(ct, conn, &ct->buckets[i]);
>>>> + struct conntrack_bucket *ctb = &ct->buckets[i];
>>>> + ct_lock_lock(&ctb->lock);
>>>> + for (unsigned j = 0; j < N_CT_TM; j++) {
>>>> + struct conn *conn, *next;
>>>> + LIST_FOR_EACH_SAFE (conn, next, exp_node,
>>> &ctb->exp_lists[j]) {
>>>> + if (!zone || *zone == conn->key.zone) {
>>>> + ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>> + conn_clean(ct, conn, ctb);
>>>> + }
>>>> }
>>>> }
>>>> - ct_lock_unlock(&ct->buckets[i].lock);
>>>> + ct_lock_unlock(&ctb->lock);
>>>> }
>>>>
>>>> return 0;
>>>> (END)
>>>>
>>>>
>>>> On Wed, Mar 6, 2019 at 11:33 PM solomon <[email protected]>
>>> wrote:
>>>>
>>>>> Darrell Ball wrote:
>>>>>> + LIST_FOR_EACH_SAFE (conn, next, exp_node,
>>>>> &ctb->exp_lists[j]) {
>>>>>> + if (!zone || *zone == conn->key.zone) {
>>>>>> + ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>>>
>>>>> why do we need this assert?
>>>>> Clean the CT_CONN_TYPE_DEFAULT type in conntrack_flush(), and clean
>>>>> CT_CONN_TYPE_UN_NAT in nat_clean() like following:
>>>>> + if ((!zone || *zone == conn->key.zone) &&
>>>>> + (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>>>>> + //ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>>>
>>>>>
>>>>> with the above code, catch an another panic which in ct_clean thread.
>>>>> I have see the same panic without changeing the code.
>>>>> Both ct_clean and flush-conntrack, can catch the bad bucket value.
>>>>>
>>>>> #0 0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>>>>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>>>>> 287 while (*bucket != node) {
>>>>> [Current thread is 1 (Thread 0x7f948ffff700 (LWP 2085796))]
>>>>> (gdb) bt
>>>>> #0 0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>>>>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>>>>> #1 conn_clean (ct=ct@entry=0x7f9498700d98, conn=0x7f871bdc41b0,
>>>>> ctb=ctb@entry=0x7f9498701c38) at lib/conntrack.c:815
>>>>> #2 0x0000562ae7402a28 in sweep_bucket (limit=3906, now=1223168469,
>>>>> ctb=<optimized out>, ct=0x7f9498700d98) at lib/conntrack.c:1396
>>>>> #3 conntrack_clean (now=1223168469, ct=0x7f9498700d98) at
>>>>> lib/conntrack.c:1433
>>>>> #4 clean_thread_main (f_=0x7f9498700d98) at lib/conntrack.c:1488
>>>>> #5 0x0000562ae737a48f in ovsthread_wrapper (aux_=<optimized out>) at
>>>>> lib/ovs-thread.c:354
>>>>> #6 0x00007f9497715494 in start_thread (arg=0x7f948ffff700) at
>>>>> pthread_create.c:333
>>>>> #7 0x00007f9496d09acf in clone () at
>>>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>>>>> (gdb) p bucket
>>>>> $1 = (struct hmap_node **) 0x8 <====== why is bucket not a point
>>> value?
>>>>> (gdb) p *(struct hmap *) 0x7f9498701c68
>>>>> $2 = {buckets = 0x7f8609f8fc00, one = 0x0, mask = 32767, n = 31040}
>>>>> (gdb) p *(struct hmap_node *) 0x7f871bdc4258
>>>>> $3 = {hash = 203059667, next = 0x7f8707cfe6c8}
>>>>> (gdb) p 203059667&32767
>>>>> $4 = 29139
>>>>> (gdb) p &hmap->buckets[29139]
>>>>> $5 = (struct hmap_node **) 0x7f8609fc8a98
>>>>>
>>>>>
>>>>>> + conn_clean(ct, conn, ctb);
>>>>>> + }
>>>>>> }
>>>>>> }
>>>>>> - ct_lock_unlock(&ct->buckets[i].lock);
>>>>>> + ct_lock_unlock(&ctb->lock);
>>>>>> }
>>>>>>
>>>>>> return 0;
>>>>>>
>>>>>>
>>>>>> which yields conntrack_flush(...) as
>>>>>>
>>>>>> int
>>>>>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>>>>>> {
>>>>>> for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>>>>>> struct conntrack_bucket *ctb = &ct->buckets[i];
>>>>>> ct_lock_lock(&ctb->lock);
>>>>>> for (unsigned j = 0; j < N_CT_TM; j++) {
>>>>>> struct conn *conn, *next;
>>>>>> LIST_FOR_EACH_SAFE (conn, next, exp_node,
>>>>> &ctb->exp_lists[j]) {
>>>>>> if (!zone || *zone == conn->key.zone) {
>>>>>> ovs_assert(conn->conn_type ==
>>> CT_CONN_TYPE_DEFAULT);
>>>>>> conn_clean(ct, conn, ctb);
>>>>>> }
>>>>>> }
>>>>>> }
>>>>>> ct_lock_unlock(&ctb->lock);
>>>>>> }
>>>>>>
>>>>>> return 0;
>>>>>> }
>>>>>>
>>>>>> Thanks Darrell
>>>>>>
>>>>>>
>>>>>>
>>>>>> On Wed, Mar 6, 2019 at 8:06 PM solomon <[email protected]>
>>> wrote:
>>>>>>
>>>>>>>
>>>>>>> When i test conntrack, i catch a panic of ovs.
>>>>>>>
>>>>>>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>>>>>>> -vconsole:emer -vsyslog:err -vfi'.
>>>>>>> Program terminated with signal SIGSEGV, Segmentation fault.
>>>>>>> #0 0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>>>>> 287 while (*bucket != node) {
>>>>>>> [Current thread is 1 (Thread 0x7f8178dccb00 (LWP 2024338))]
>>>>>>> (gdb) bt
>>>>>>> #0 0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>>>>> #1 conn_clean (ct=ct@entry=0x7f8178c75d98, conn=0x7f734cde0170,
>>>>>>> ctb=ctb@entry=0x7f8178c7fd40) at lib/conntrack.c:815
>>>>>>> #2 0x00005605c5cdd66a in conntrack_flush (ct=0x7f8178c75d98,
>>> zone=0x0)
>>>>> at
>>>>>>> lib/conntrack.c:2549
>>>>>>> #3 0x00005605c5bc2b39 in ct_dpif_flush (dpif=0x5605c68a6430,
>>>>>>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
>>>>>>> #4 0x00005605c5ce17a0 in dpctl_flush_conntrack (argc=argc@entry=1,
>>>>>>> argv=argv@entry=0x5605c697ec30, dpctl_p=dpctl_p@entry
>>> =0x7fffee718110)
>>>>> at
>>>>>>> lib/dpctl.c:1388
>>>>>>> #5 0x00005605c5cdeb78 in dpctl_unixctl_handler (conn=0x5605c6959ca0,
>>>>>>> argc=1, argv=0x5605c697ec30, aux=0x5605c5ce1610
>>>>> <dpctl_flush_conntrack>) at
>>>>>>> lib/dpctl.c:2312
>>>>>>> #6 0x00005605c5c806ea in process_command (request=<optimized out>,
>>>>>>> conn=0x5605c6959ca0) at lib/unixctl.c:308
>>>>>>> #7 run_connection (conn=0x5605c6959ca0) at lib/unixctl.c:342
>>>>>>> #8 unixctl_server_run (server=0x5605c6868230) at lib/unixctl.c:393
>>>>>>> #9 0x00005605c5804217 in main (argc=<optimized out>, argv=<optimized
>>>>>>> out>) at vswitchd/ovs-vswitchd.c:126
>>>>>>>
>>>>>>>
>>>>>>> Environment:
>>>>>>> ovs-2.10.1
>>>>>>> dpdk-18.0.2.2
>>>>>>>
>>>>>>> How-To-Repeat:
>>>>>>> 1. configure ovs with snat aciton.
>>>>>>>
>>>>>>> ovs-ofctl -O OpenFlow15 add-group $br_name "group_id=1, type=select,
>>>>>>> selection_method=hash
>>>>>>>
>>>>>
>>> bucket=bucket_id=1,weight:100,actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>>>>>>> "
>>>>>>>
>>>>>>> 2. syn-ddos send tcp syn packet to generate connection tracks.
>>>>>>> 3.
>>>>>>> # ovs-appctl dpctl/ct-get-nconns
>>>>>>> 2063993
>>>>>>> # ovs-appctl dpctl/flush-conntrack
>>>>>>>
>>>>>>> 2019-03-07T03:52:24Z|00001|unixctl|WARN|error communicating with
>>>>>>> unix:/var/run/openvswitch/ovs-vswitchd.2024338.ctl: End of file
>>>>>>> ovs-appctl: ovs-vswitchd: transaction error (End of file)
>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Thanks
>>>>>>> Solomon
>>>>>>>
>>>>>>
>>>>>
>>>>> --
>>>>>
>>>>> Thanks
>>>>> Solomon
>>>>>
>>>>
>>>
>>> --
>>>
>>> Thanks
>>> Li Wei
>>>
>>
>
--
Thanks
Li Wei
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 974f985bd..a4a3d177c 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -249,6 +249,22 @@ conn_key_cmp(const struct conn_key *key1, const struct
conn_key *key2)
return 1;
}
+static bool
+conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
+ const struct conn_key *key)
+ OVS_REQUIRES(ctb->lock)
+{
+ struct conn *conn;
+ uint32_t hash = conn_key_hash(key, ct->hash_basis);
+
+ HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+ if (!conn_key_cmp(&conn->key, key)) {
+ return true;
+ }
+ }
+ return false;
+}
+
static void
ct_print_conn_info(const struct conn *c, const char *log_msg,
enum vlog_level vll, bool force, bool rl_on)
@@ -738,29 +754,54 @@ un_nat_packet(struct dp_packet *pkt, const struct conn
*conn,
}
}
-/* Typical usage of this helper is in non per-packet code;
- * this is because the bucket lock needs to be held for lookup
- * and a hash would have already been needed. Hence, this function
- * is just intended for code clarity. */
+/* This function is called with the bucket lock held. */
static struct conn *
-conn_lookup(struct conntrack *ct, const struct conn_key *key, long long now)
+conn_lookup_any(const struct conn_key *key,
+ const struct conntrack_bucket *ctb, uint32_t hash)
{
- struct conn_lookup_ctx ctx;
- ctx.conn = NULL;
- ctx.key = *key;
- ctx.hash = conn_key_hash(key, ct->hash_basis);
- unsigned bucket = hash_to_bucket(ctx.hash);
- conn_key_lookup(&ct->buckets[bucket], &ctx, now);
- return ctx.conn;
+
+ struct conn *conn = NULL;
+ HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+ if (!conn_key_cmp(&conn->key, key)) {
+ break;
+ }
+ if (!conn_key_cmp(&conn->rev_key, key)) {
+ break;
+ }
+ }
+ return conn;
+}
+
+/* This function is called with the bucket lock held. */
+static struct conn *
+conn_lookup_unnat(const struct conn_key *key,
+ const struct conntrack_bucket *ctb, uint32_t hash)
+{
+ struct conn *conn = NULL;
+
+ HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+ if (!conn_key_cmp(&conn->key, key)
+ && conn->conn_type == CT_CONN_TYPE_UN_NAT) {
+ break;
+ }
+ }
+ return conn;
}
static void
conn_seq_skew_set(struct conntrack *ct, const struct conn_key *key,
long long now, int seq_skew, bool seq_skew_dir)
{
- unsigned bucket = hash_to_bucket(conn_key_hash(key, ct->hash_basis));
+ uint32_t hash = conn_key_hash(key, ct->hash_basis);
+ unsigned bucket = hash_to_bucket(hash);
+
ct_lock_lock(&ct->buckets[bucket].lock);
- struct conn *conn = conn_lookup(ct, key, now);
+ struct conn_lookup_ctx ctx;
+ ctx.key = *key;
+ ctx.hash = hash;
+ conn_key_lookup(&ct->buckets[bucket], &ctx, now);
+ struct conn *conn = ctx.conn;
+
if (conn && seq_skew) {
conn->seq_skew = seq_skew;
conn->seq_skew_dir = seq_skew_dir;
@@ -777,12 +818,14 @@ nat_clean(struct conntrack *ct, struct conn *conn,
nat_conn_keys_remove(&ct->nat_conn_keys, &conn->rev_key, ct->hash_basis);
ct_rwlock_unlock(&ct->resources_lock);
ct_lock_unlock(&ctb->lock);
- unsigned bucket_rev_conn =
- hash_to_bucket(conn_key_hash(&conn->rev_key, ct->hash_basis));
+ uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
+ unsigned bucket_rev_conn = hash_to_bucket(hash);
+
ct_lock_lock(&ct->buckets[bucket_rev_conn].lock);
ct_rwlock_wrlock(&ct->resources_lock);
- long long now = time_msec();
- struct conn *rev_conn = conn_lookup(ct, &conn->rev_key, now);
+ struct conn *rev_conn = conn_lookup_unnat(&conn->rev_key,
+ &ct->buckets[bucket_rev_conn],
+ hash);
struct nat_conn_key_node *nat_conn_key_node =
nat_conn_keys_lookup(&ct->nat_conn_keys, &conn->rev_key,
ct->hash_basis);
@@ -812,7 +855,15 @@ conn_clean(struct conntrack *ct, struct conn *conn,
expectation_clean(ct, &conn->key, ct->hash_basis);
}
ovs_list_remove(&conn->exp_node);
- hmap_remove(&ctb->connections, &conn->node);
+ //hmap_remove(&ctb->connections, &conn->node);
+
+ if (conn_key_present(ct, ctb, &conn->key)) {
+ hmap_remove(&ctb->connections, &conn->node);
+ } else {
+ char *log_msg = xasprintf("conn_clean: conn not present in hmap");
+ ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
+ free(log_msg);
+ }
atomic_count_dec(&ct->n_conn);
if (conn->nat_info) {
nat_clean(ct, conn, ctb);
@@ -1005,7 +1056,7 @@ conn_update_state(struct conntrack *ct, struct dp_packet
*pkt,
static void
create_un_nat_conn(struct conntrack *ct, struct conn *conn_for_un_nat_copy,
- long long now, bool alg_un_nat)
+ bool alg_un_nat)
{
struct conn *nc = xmemdup(conn_for_un_nat_copy, sizeof *nc);
nc->key = conn_for_un_nat_copy->rev_key;
@@ -1013,7 +1064,8 @@ create_un_nat_conn(struct conntrack *ct, struct conn
*conn_for_un_nat_copy,
uint32_t un_nat_hash = conn_key_hash(&nc->key, ct->hash_basis);
unsigned un_nat_conn_bucket = hash_to_bucket(un_nat_hash);
ct_lock_lock(&ct->buckets[un_nat_conn_bucket].lock);
- struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
+ struct conn *rev_conn = conn_lookup_any(&nc->key,
&ct->buckets[un_nat_conn_bucket],
+ un_nat_hash);
if (alg_un_nat) {
if (!rev_conn) {
@@ -1022,7 +1074,7 @@ create_un_nat_conn(struct conntrack *ct, struct conn
*conn_for_un_nat_copy,
} else {
char *log_msg = xasprintf("Unusual condition for un_nat conn "
"create for alg: rev_conn %p", rev_conn);
- ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+ ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
free(log_msg);
free(nc);
}
@@ -1032,14 +1084,25 @@ create_un_nat_conn(struct conntrack *ct, struct conn
*conn_for_un_nat_copy,
struct nat_conn_key_node *nat_conn_key_node =
nat_conn_keys_lookup(&ct->nat_conn_keys, &nc->key, ct->hash_basis);
if (nat_conn_key_node && !conn_key_cmp(&nat_conn_key_node->value,
- &nc->rev_key) && !rev_conn) {
- hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
- &nc->node, un_nat_hash);
+ &nc->rev_key)) {
+
+ if (!rev_conn) {
+ hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
+ &nc->node, un_nat_hash);
+ } else {
+ char *log_msg = xasprintf("NAT conflict; un-nat will not"
+ "happen; likely DOS");
+ ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
+ free(log_msg);
+ nat_conn_keys_remove(&ct->nat_conn_keys, &nc->key,
+ ct->hash_basis);
+ free(nc);
+ }
} else {
char *log_msg = xasprintf("Unusual condition for un_nat conn "
"create: nat_conn_key_node/rev_conn "
"%p/%p", nat_conn_key_node, rev_conn);
- ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+ ct_print_conn_info(nc, log_msg, VLL_WARN, true, false);
free(log_msg);
free(nc);
}
@@ -1286,7 +1349,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
ct_lock_unlock(&ct->buckets[bucket].lock);
if (is_un_nat_conn_valid(&conn_for_un_nat_copy)) {
- create_un_nat_conn(ct, &conn_for_un_nat_copy, now, !!alg_exp);
+ create_un_nat_conn(ct, &conn_for_un_nat_copy, !!alg_exp);
}
handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info,
@@ -1383,19 +1446,19 @@ sweep_bucket(struct conntrack *ct, struct
conntrack_bucket *ctb,
for (unsigned i = 0; i < N_CT_TM; i++) {
LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
- if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
- if (!conn_expired(conn, now) || count >= limit) {
- min_expiration = MIN(min_expiration, conn->expiration);
- if (count >= limit) {
- /* Do not check other lists. */
- COVERAGE_INC(conntrack_long_cleanup);
- return min_expiration;
- }
- break;
+ ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+ if (!conn_expired(conn, now) || count >= limit) {
+ min_expiration = MIN(min_expiration, conn->expiration);
+ if (count >= limit) {
+ /* Do not check other lists. */
+ COVERAGE_INC(conntrack_long_cleanup);
+ return min_expiration;
+
}
- conn_clean(ct, conn, ctb);
- count++;
- }
+ break;
+ }
+ conn_clean(ct, conn, ctb);
+ count++;
}
}
return min_expiration;
@@ -2540,16 +2603,18 @@ int
conntrack_flush(struct conntrack *ct, const uint16_t *zone)
{
for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
- struct conn *conn, *next;
-
- ct_lock_lock(&ct->buckets[i].lock);
- HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections) {
- if ((!zone || *zone == conn->key.zone) &&
- (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
- conn_clean(ct, conn, &ct->buckets[i]);
+ struct conntrack_bucket *ctb = &ct->buckets[i];
+ ct_lock_lock(&ctb->lock);
+ for (unsigned j = 0; j < N_CT_TM; j++) {
+ struct conn *conn, *next;
+ LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
+ if (!zone || *zone == conn->key.zone) {
+ ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+ conn_clean(ct, conn, ctb);
+ }
}
}
- ct_lock_unlock(&ct->buckets[i].lock);
+ ct_lock_unlock(&ctb->lock);
}
return 0;
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev