hi darrell:

I set nat action with 
actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)

With your patch, new double free panic happens in conntrack_flush() and 
sweep_bucket():

==1st panic==
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock 
-vconsole:emer -vsyslog:err -vfi'.
Program terminated with signal SIGABRT, Aborted.
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
[Current thread is 1 (Thread 0x7f92b122cb00 (LWP 2387347))]
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1  0x00007f92af62a42a in __GI_abort () at abort.c:89
#2  0x00007f92af666c00 in __libc_message (do_abort=do_abort@entry=2, 
fmt=fmt@entry=0x7f92af75bd98 "*** Error in `%s': %s: 0x%s ***\n")
    at ../sysdeps/posix/libc_fatal.c:175
#3  0x00007f92af66cfc6 in malloc_printerr (action=3, str=0x7f92af75bef0 "double 
free or corruption (fasttop)", ptr=<optimized out>, ar_ptr=<optimized out>)
    at malloc.c:5049
#4  0x00007f92af66d80e in _int_free (av=0x7f8bb4000020, p=0x7f8bb70ef770, 
have_lock=0) at malloc.c:3905
#5  0x00005622735d3f90 in delete_conn (conn=conn@entry=0x7f8bb70ef670) at 
lib/conntrack.c:2380
#6  0x00005622735d52ad in nat_clean (ctb=0x7f92b10da7f0, conn=0x7f8bb70ef670, 
ct=0x7f92b10d5d98) at lib/conntrack.c:816
#7  conn_clean (ct=ct@entry=0x7f92b10d5d98, conn=0x7f8bb70ef670, 
ctb=ctb@entry=0x7f92b10da7f0) at lib/conntrack.c:842
#8  0x00005622735da710 in conntrack_flush (ct=0x7f92b10d5d98, zone=0x0) at 
lib/conntrack.c:2574
#9  0x00005622734bfb39 in ct_dpif_flush (dpif=0x5622758671d0, 
zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
#10 0x00005622735de860 in dpctl_flush_conntrack (argc=argc@entry=1, 
argv=argv@entry=0x56227589cc40, dpctl_p=dpctl_p@entry=0x7fff9087fe90) at 
lib/dpctl.c:1388
#11 0x00005622735dbc38 in dpctl_unixctl_handler (conn=0x56227589bc90, argc=1, 
argv=0x56227589cc40, aux=0x5622735de6d0 <dpctl_flush_conntrack>) at 
lib/dpctl.c:2312
#12 0x000056227357d6ea in process_command (request=<optimized out>, 
conn=0x56227589bc90) at lib/unixctl.c:308
#13 run_connection (conn=0x56227589bc90) at lib/unixctl.c:342
#14 unixctl_server_run (server=0x5622757af230) at lib/unixctl.c:393
#15 0x0000562273101217 in main (argc=<optimized out>, argv=<optimized out>) at 
vswitchd/ovs-vswitchd.c:126

The debug info in /var/log/openvswitch/ovs-vswitchd.log:
70 2019-03-08T00:54:31.278Z|00227|conntrack|WARN|conn_clean: conn not present 
in hmap: src ip 32.248.14.183 dst ip 222.15.63.163 rev src ip 222.15.63.163 rev 
dst ip     172.112.98.138 src/dst ports 54957/80 rev src/dst ports 80/54957 
zone/rev zone 0/0 nw_proto/rev nw_proto 6/6

==sec panic==
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1  0x00007faece4b642a in __GI_abort () at abort.c:89
#2  0x00007faece4f2c00 in __libc_message (do_abort=do_abort@entry=2, 
fmt=fmt@entry=0x7faece5e7d98 "*** Error in `%s': %s: 0x%s ***\n")
    at ../sysdeps/posix/libc_fatal.c:175
#3  0x00007faece4f8fc6 in malloc_printerr (action=3, str=0x7faece5e7e60 "double 
free or corruption (out)", ptr=<optimized out>, ar_ptr=<optimized out>)
    at malloc.c:5049
#4  0x00007faece4f980e in _int_free (av=0x7faece81bb00 <main_arena>, 
p=0x7fa11c50ee30, have_lock=0) at malloc.c:3905
#5  0x0000562c63ac82ad in nat_clean (ctb=0x7faecff65cf8, conn=0x7fa11c50ee40, 
ct=0x7faecff61d98) at lib/conntrack.c:816
#6  conn_clean (ct=ct@entry=0x7faecff61d98, conn=0x7fa11c50ee40, 
ctb=ctb@entry=0x7faecff65cf8) at lib/conntrack.c:842
#7  0x0000562c63ac8639 in sweep_bucket (limit=3906, now=1287899232, 
ctb=<optimized out>, ct=0x7faecff61d98) at lib/conntrack.c:1421
#8  conntrack_clean (now=1287899232, ct=0x7faecff61d98) at lib/conntrack.c:1457
#9  clean_thread_main (f_=0x7faecff61d98) at lib/conntrack.c:1512
#10 0x0000562c63a3f48f in ovsthread_wrapper (aux_=<optimized out>) at 
lib/ovs-thread.c:354
#11 0x00007faecef76494 in start_thread (arg=0x7faec77fe700) at 
pthread_create.c:333
#12 0x00007faece56aacf in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:97

2019-03-08T01:15:16.929Z|00055|conntrack(ct_clean3)|WARN|conn_clean: conn not 
present in hmap: src ip 2.92.142.188 dst ip 222.15.63.163 rev src ip 
222.15.63.163 rev dst ip 172.116.154.125 src/dst ports 23446/80 rev src/dst 
ports 80/23446 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6


Darrell Ball wrote:
> Thanks for your help Solomon
> 
> Can you give the following debug patch a spin ?
> I will continue to try to repo locally.
> 
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 5410ab4..6968c03 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const struct
> conn_key *key2)
>      return 1;
>  }
> 
> +static bool
> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> +                 const struct conn_key *key)
> +    OVS_REQUIRES(ctb->lock)
> +{
> +    struct conn *conn;
> +    uint32_t hash = conn_key_hash(key, ct->hash_basis);
> +
> +    HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> +        if (!conn_key_cmp(&conn->key, key)) {
> +            return true;
> +        }
> +    }
> +    return false;
> +}
> +
>  static void
>  ct_print_conn_info(const struct conn *c, const char *log_msg,
>                     enum vlog_level vll, bool force, bool rl_on)
> @@ -812,7 +828,14 @@ conn_clean(struct conntrack *ct, struct conn *conn,
>          expectation_clean(ct, &conn->key, ct->hash_basis);
>      }
>      ovs_list_remove(&conn->exp_node);
> -    hmap_remove(&ctb->connections, &conn->node);
> +
> +    if (conn_key_present(ct, ctb, &conn->key)) {
> +        hmap_remove(&ctb->connections, &conn->node);
> +    } else {
> +        char *log_msg = xasprintf("conn_clean: conn not present in hmap");
> +        ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> +        free(log_msg);
> +    }
>      atomic_count_dec(&ct->n_conn);
>      if (conn->nat_info) {
>          nat_clean(ct, conn, ctb);
> @@ -1383,19 +1406,18 @@ sweep_bucket(struct conntrack *ct, struct
> conntrack_bucket *ctb,
> 
>      for (unsigned i = 0; i < N_CT_TM; i++) {
>          LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> -            if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> -                if (!conn_expired(conn, now) || count >= limit) {
> -                    min_expiration = MIN(min_expiration, conn->expiration);
> -                    if (count >= limit) {
> -                        /* Do not check other lists. */
> -                        COVERAGE_INC(conntrack_long_cleanup);
> -                        return min_expiration;
> -                    }
> -                    break;
> +            ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> +            if (!conn_expired(conn, now) || count >= limit) {
> +                min_expiration = MIN(min_expiration, conn->expiration);
> +                if (count >= limit) {
> +                    /* Do not check other lists. */
> +                    COVERAGE_INC(conntrack_long_cleanup);
> +                    return min_expiration;
>                  }
> -                conn_clean(ct, conn, ctb);
> -                count++;
> +                break;
>              }
> +            conn_clean(ct, conn, ctb);
> +            count++;
>          }
>      }
>      return min_expiration;
> @@ -2540,16 +2562,18 @@ int
>  conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>  {
>      for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> -        struct conn *conn, *next;
> -
> -        ct_lock_lock(&ct->buckets[i].lock);
> -        HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections)
> {
> -            if ((!zone || *zone == conn->key.zone) &&
> -                (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> -                conn_clean(ct, conn, &ct->buckets[i]);
> +        struct conntrack_bucket *ctb = &ct->buckets[i];
> +        ct_lock_lock(&ctb->lock);
> +        for (unsigned j = 0; j < N_CT_TM; j++) {
> +            struct conn *conn, *next;
> +            LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
> +                if (!zone || *zone == conn->key.zone) {
> +                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> +                    conn_clean(ct, conn, ctb);
> +                }
>              }
>          }
> -        ct_lock_unlock(&ct->buckets[i].lock);
> +        ct_lock_unlock(&ctb->lock);
>      }
> 
>      return 0;
> (END)
> 
> 
> On Wed, Mar 6, 2019 at 11:33 PM solomon <[email protected]> wrote:
> 
>> Darrell Ball wrote:
>>> +            LIST_FOR_EACH_SAFE (conn, next, exp_node,
>> &ctb->exp_lists[j]) {
>>> +                if (!zone || *zone == conn->key.zone) {
>>> +                    ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>
>> why do we need this assert?
>> Clean the CT_CONN_TYPE_DEFAULT type in conntrack_flush(), and clean
>> CT_CONN_TYPE_UN_NAT in nat_clean() like following:
>> +                if ((!zone || *zone == conn->key.zone) &&
>> +                    (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>> +                    //ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>
>>
>> with the above code, catch an another panic which in ct_clean thread.
>> I have see the same panic without changeing the code.
>> Both ct_clean and flush-conntrack, can catch the bad bucket value.
>>
>> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>> 287         while (*bucket != node) {
>> [Current thread is 1 (Thread 0x7f948ffff700 (LWP 2085796))]
>> (gdb) bt
>> #0  0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>> #1  conn_clean (ct=ct@entry=0x7f9498700d98, conn=0x7f871bdc41b0,
>> ctb=ctb@entry=0x7f9498701c38) at lib/conntrack.c:815
>> #2  0x0000562ae7402a28 in sweep_bucket (limit=3906, now=1223168469,
>> ctb=<optimized out>, ct=0x7f9498700d98) at lib/conntrack.c:1396
>> #3  conntrack_clean (now=1223168469, ct=0x7f9498700d98) at
>> lib/conntrack.c:1433
>> #4  clean_thread_main (f_=0x7f9498700d98) at lib/conntrack.c:1488
>> #5  0x0000562ae737a48f in ovsthread_wrapper (aux_=<optimized out>) at
>> lib/ovs-thread.c:354
>> #6  0x00007f9497715494 in start_thread (arg=0x7f948ffff700) at
>> pthread_create.c:333
>> #7  0x00007f9496d09acf in clone () at
>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>> (gdb) p bucket
>> $1 = (struct hmap_node **) 0x8  <====== why is bucket not a point value?
>> (gdb) p *(struct hmap *) 0x7f9498701c68
>> $2 = {buckets = 0x7f8609f8fc00, one = 0x0, mask = 32767, n = 31040}
>> (gdb) p *(struct hmap_node *) 0x7f871bdc4258
>> $3 = {hash = 203059667, next = 0x7f8707cfe6c8}
>> (gdb) p 203059667&32767
>> $4 = 29139
>> (gdb) p &hmap->buckets[29139]
>> $5 = (struct hmap_node **) 0x7f8609fc8a98
>>
>>
>>> +                    conn_clean(ct, conn, ctb);
>>> +                }
>>>              }
>>>          }
>>> -        ct_lock_unlock(&ct->buckets[i].lock);
>>> +        ct_lock_unlock(&ctb->lock);
>>>      }
>>>
>>>      return 0;
>>>
>>>
>>> which yields conntrack_flush(...) as
>>>
>>> int
>>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>>> {
>>>     for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>>>         struct conntrack_bucket *ctb = &ct->buckets[i];
>>>         ct_lock_lock(&ctb->lock);
>>>         for (unsigned j = 0; j < N_CT_TM; j++) {
>>>             struct conn *conn, *next;
>>>             LIST_FOR_EACH_SAFE (conn, next, exp_node,
>> &ctb->exp_lists[j]) {
>>>                 if (!zone || *zone == conn->key.zone) {
>>>                     ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>>                     conn_clean(ct, conn, ctb);
>>>                 }
>>>             }
>>>         }
>>>         ct_lock_unlock(&ctb->lock);
>>>     }
>>>
>>>     return 0;
>>> }
>>>
>>> Thanks Darrell
>>>
>>>
>>>
>>> On Wed, Mar 6, 2019 at 8:06 PM solomon <[email protected]> wrote:
>>>
>>>>
>>>>    When i test conntrack, i catch a panic of ovs.
>>>>
>>>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>>>> -vconsole:emer -vsyslog:err -vfi'.
>>>> Program terminated with signal SIGSEGV, Segmentation fault.
>>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>> 287         while (*bucket != node) {
>>>> [Current thread is 1 (Thread 0x7f8178dccb00 (LWP 2024338))]
>>>> (gdb) bt
>>>> #0  0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>> #1  conn_clean (ct=ct@entry=0x7f8178c75d98, conn=0x7f734cde0170,
>>>> ctb=ctb@entry=0x7f8178c7fd40) at lib/conntrack.c:815
>>>> #2  0x00005605c5cdd66a in conntrack_flush (ct=0x7f8178c75d98, zone=0x0)
>> at
>>>> lib/conntrack.c:2549
>>>> #3  0x00005605c5bc2b39 in ct_dpif_flush (dpif=0x5605c68a6430,
>>>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
>>>> #4  0x00005605c5ce17a0 in dpctl_flush_conntrack (argc=argc@entry=1,
>>>> argv=argv@entry=0x5605c697ec30, dpctl_p=dpctl_p@entry=0x7fffee718110)
>> at
>>>> lib/dpctl.c:1388
>>>> #5  0x00005605c5cdeb78 in dpctl_unixctl_handler (conn=0x5605c6959ca0,
>>>> argc=1, argv=0x5605c697ec30, aux=0x5605c5ce1610
>> <dpctl_flush_conntrack>) at
>>>> lib/dpctl.c:2312
>>>> #6  0x00005605c5c806ea in process_command (request=<optimized out>,
>>>> conn=0x5605c6959ca0) at lib/unixctl.c:308
>>>> #7  run_connection (conn=0x5605c6959ca0) at lib/unixctl.c:342
>>>> #8  unixctl_server_run (server=0x5605c6868230) at lib/unixctl.c:393
>>>> #9  0x00005605c5804217 in main (argc=<optimized out>, argv=<optimized
>>>> out>) at vswitchd/ovs-vswitchd.c:126
>>>>
>>>>
>>>> Environment:
>>>> ovs-2.10.1
>>>> dpdk-18.0.2.2
>>>>
>>>> How-To-Repeat:
>>>> 1. configure ovs with snat aciton.
>>>>
>>>> ovs-ofctl  -O OpenFlow15 add-group $br_name "group_id=1, type=select,
>>>> selection_method=hash
>>>>
>> bucket=bucket_id=1,weight:100,actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>>>> "
>>>>
>>>> 2. syn-ddos send tcp syn packet to generate connection tracks.
>>>> 3.
>>>> #   ovs-appctl dpctl/ct-get-nconns
>>>> 2063993
>>>> #   ovs-appctl dpctl/flush-conntrack
>>>>
>>>> 2019-03-07T03:52:24Z|00001|unixctl|WARN|error communicating with
>>>> unix:/var/run/openvswitch/ovs-vswitchd.2024338.ctl: End of file
>>>> ovs-appctl: ovs-vswitchd: transaction error (End of file)
>>>>
>>>>
>>>> --
>>>> Thanks
>>>> Solomon
>>>>
>>>
>>
>> --
>>
>> Thanks
>> Solomon
>>
> 

-- 

Thanks
Li Wei
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to