hi darrell:
I set nat action with
actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
With your patch, new double free panic happens in conntrack_flush() and
sweep_bucket():
==1st panic==
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
-vconsole:emer -vsyslog:err -vfi'.
Program terminated with signal SIGABRT, Aborted.
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
[Current thread is 1 (Thread 0x7f92b122cb00 (LWP 2387347))]
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007f92af62a42a in __GI_abort () at abort.c:89
#2 0x00007f92af666c00 in __libc_message (do_abort=do_abort@entry=2,
fmt=fmt@entry=0x7f92af75bd98 "*** Error in `%s': %s: 0x%s ***\n")
at ../sysdeps/posix/libc_fatal.c:175
#3 0x00007f92af66cfc6 in malloc_printerr (action=3, str=0x7f92af75bef0 "double
free or corruption (fasttop)", ptr=<optimized out>, ar_ptr=<optimized out>)
at malloc.c:5049
#4 0x00007f92af66d80e in _int_free (av=0x7f8bb4000020, p=0x7f8bb70ef770,
have_lock=0) at malloc.c:3905
#5 0x00005622735d3f90 in delete_conn (conn=conn@entry=0x7f8bb70ef670) at
lib/conntrack.c:2380
#6 0x00005622735d52ad in nat_clean (ctb=0x7f92b10da7f0, conn=0x7f8bb70ef670,
ct=0x7f92b10d5d98) at lib/conntrack.c:816
#7 conn_clean (ct=ct@entry=0x7f92b10d5d98, conn=0x7f8bb70ef670,
ctb=ctb@entry=0x7f92b10da7f0) at lib/conntrack.c:842
#8 0x00005622735da710 in conntrack_flush (ct=0x7f92b10d5d98, zone=0x0) at
lib/conntrack.c:2574
#9 0x00005622734bfb39 in ct_dpif_flush (dpif=0x5622758671d0,
zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
#10 0x00005622735de860 in dpctl_flush_conntrack (argc=argc@entry=1,
argv=argv@entry=0x56227589cc40, dpctl_p=dpctl_p@entry=0x7fff9087fe90) at
lib/dpctl.c:1388
#11 0x00005622735dbc38 in dpctl_unixctl_handler (conn=0x56227589bc90, argc=1,
argv=0x56227589cc40, aux=0x5622735de6d0 <dpctl_flush_conntrack>) at
lib/dpctl.c:2312
#12 0x000056227357d6ea in process_command (request=<optimized out>,
conn=0x56227589bc90) at lib/unixctl.c:308
#13 run_connection (conn=0x56227589bc90) at lib/unixctl.c:342
#14 unixctl_server_run (server=0x5622757af230) at lib/unixctl.c:393
#15 0x0000562273101217 in main (argc=<optimized out>, argv=<optimized out>) at
vswitchd/ovs-vswitchd.c:126
The debug info in /var/log/openvswitch/ovs-vswitchd.log:
70 2019-03-08T00:54:31.278Z|00227|conntrack|WARN|conn_clean: conn not present
in hmap: src ip 32.248.14.183 dst ip 222.15.63.163 rev src ip 222.15.63.163 rev
dst ip 172.112.98.138 src/dst ports 54957/80 rev src/dst ports 80/54957
zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
==sec panic==
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007faece4b642a in __GI_abort () at abort.c:89
#2 0x00007faece4f2c00 in __libc_message (do_abort=do_abort@entry=2,
fmt=fmt@entry=0x7faece5e7d98 "*** Error in `%s': %s: 0x%s ***\n")
at ../sysdeps/posix/libc_fatal.c:175
#3 0x00007faece4f8fc6 in malloc_printerr (action=3, str=0x7faece5e7e60 "double
free or corruption (out)", ptr=<optimized out>, ar_ptr=<optimized out>)
at malloc.c:5049
#4 0x00007faece4f980e in _int_free (av=0x7faece81bb00 <main_arena>,
p=0x7fa11c50ee30, have_lock=0) at malloc.c:3905
#5 0x0000562c63ac82ad in nat_clean (ctb=0x7faecff65cf8, conn=0x7fa11c50ee40,
ct=0x7faecff61d98) at lib/conntrack.c:816
#6 conn_clean (ct=ct@entry=0x7faecff61d98, conn=0x7fa11c50ee40,
ctb=ctb@entry=0x7faecff65cf8) at lib/conntrack.c:842
#7 0x0000562c63ac8639 in sweep_bucket (limit=3906, now=1287899232,
ctb=<optimized out>, ct=0x7faecff61d98) at lib/conntrack.c:1421
#8 conntrack_clean (now=1287899232, ct=0x7faecff61d98) at lib/conntrack.c:1457
#9 clean_thread_main (f_=0x7faecff61d98) at lib/conntrack.c:1512
#10 0x0000562c63a3f48f in ovsthread_wrapper (aux_=<optimized out>) at
lib/ovs-thread.c:354
#11 0x00007faecef76494 in start_thread (arg=0x7faec77fe700) at
pthread_create.c:333
#12 0x00007faece56aacf in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:97
2019-03-08T01:15:16.929Z|00055|conntrack(ct_clean3)|WARN|conn_clean: conn not
present in hmap: src ip 2.92.142.188 dst ip 222.15.63.163 rev src ip
222.15.63.163 rev dst ip 172.116.154.125 src/dst ports 23446/80 rev src/dst
ports 80/23446 zone/rev zone 0/0 nw_proto/rev nw_proto 6/6
Darrell Ball wrote:
> Thanks for your help Solomon
>
> Can you give the following debug patch a spin ?
> I will continue to try to repo locally.
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 5410ab4..6968c03 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const struct
> conn_key *key2)
> return 1;
> }
>
> +static bool
> +conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
> + const struct conn_key *key)
> + OVS_REQUIRES(ctb->lock)
> +{
> + struct conn *conn;
> + uint32_t hash = conn_key_hash(key, ct->hash_basis);
> +
> + HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
> + if (!conn_key_cmp(&conn->key, key)) {
> + return true;
> + }
> + }
> + return false;
> +}
> +
> static void
> ct_print_conn_info(const struct conn *c, const char *log_msg,
> enum vlog_level vll, bool force, bool rl_on)
> @@ -812,7 +828,14 @@ conn_clean(struct conntrack *ct, struct conn *conn,
> expectation_clean(ct, &conn->key, ct->hash_basis);
> }
> ovs_list_remove(&conn->exp_node);
> - hmap_remove(&ctb->connections, &conn->node);
> +
> + if (conn_key_present(ct, ctb, &conn->key)) {
> + hmap_remove(&ctb->connections, &conn->node);
> + } else {
> + char *log_msg = xasprintf("conn_clean: conn not present in hmap");
> + ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
> + free(log_msg);
> + }
> atomic_count_dec(&ct->n_conn);
> if (conn->nat_info) {
> nat_clean(ct, conn, ctb);
> @@ -1383,19 +1406,18 @@ sweep_bucket(struct conntrack *ct, struct
> conntrack_bucket *ctb,
>
> for (unsigned i = 0; i < N_CT_TM; i++) {
> LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
> - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
> - if (!conn_expired(conn, now) || count >= limit) {
> - min_expiration = MIN(min_expiration, conn->expiration);
> - if (count >= limit) {
> - /* Do not check other lists. */
> - COVERAGE_INC(conntrack_long_cleanup);
> - return min_expiration;
> - }
> - break;
> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> + if (!conn_expired(conn, now) || count >= limit) {
> + min_expiration = MIN(min_expiration, conn->expiration);
> + if (count >= limit) {
> + /* Do not check other lists. */
> + COVERAGE_INC(conntrack_long_cleanup);
> + return min_expiration;
> }
> - conn_clean(ct, conn, ctb);
> - count++;
> + break;
> }
> + conn_clean(ct, conn, ctb);
> + count++;
> }
> }
> return min_expiration;
> @@ -2540,16 +2562,18 @@ int
> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> {
> for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> - struct conn *conn, *next;
> -
> - ct_lock_lock(&ct->buckets[i].lock);
> - HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections)
> {
> - if ((!zone || *zone == conn->key.zone) &&
> - (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> - conn_clean(ct, conn, &ct->buckets[i]);
> + struct conntrack_bucket *ctb = &ct->buckets[i];
> + ct_lock_lock(&ctb->lock);
> + for (unsigned j = 0; j < N_CT_TM; j++) {
> + struct conn *conn, *next;
> + LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
> + if (!zone || *zone == conn->key.zone) {
> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> + conn_clean(ct, conn, ctb);
> + }
> }
> }
> - ct_lock_unlock(&ct->buckets[i].lock);
> + ct_lock_unlock(&ctb->lock);
> }
>
> return 0;
> (END)
>
>
> On Wed, Mar 6, 2019 at 11:33 PM solomon <[email protected]> wrote:
>
>> Darrell Ball wrote:
>>> + LIST_FOR_EACH_SAFE (conn, next, exp_node,
>> &ctb->exp_lists[j]) {
>>> + if (!zone || *zone == conn->key.zone) {
>>> + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>
>> why do we need this assert?
>> Clean the CT_CONN_TYPE_DEFAULT type in conntrack_flush(), and clean
>> CT_CONN_TYPE_UN_NAT in nat_clean() like following:
>> + if ((!zone || *zone == conn->key.zone) &&
>> + (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
>> + //ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>
>>
>> with the above code, catch an another panic which in ct_clean thread.
>> I have see the same panic without changeing the code.
>> Both ct_clean and flush-conntrack, can catch the bad bucket value.
>>
>> #0 0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>> 287 while (*bucket != node) {
>> [Current thread is 1 (Thread 0x7f948ffff700 (LWP 2085796))]
>> (gdb) bt
>> #0 0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
>> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
>> #1 conn_clean (ct=ct@entry=0x7f9498700d98, conn=0x7f871bdc41b0,
>> ctb=ctb@entry=0x7f9498701c38) at lib/conntrack.c:815
>> #2 0x0000562ae7402a28 in sweep_bucket (limit=3906, now=1223168469,
>> ctb=<optimized out>, ct=0x7f9498700d98) at lib/conntrack.c:1396
>> #3 conntrack_clean (now=1223168469, ct=0x7f9498700d98) at
>> lib/conntrack.c:1433
>> #4 clean_thread_main (f_=0x7f9498700d98) at lib/conntrack.c:1488
>> #5 0x0000562ae737a48f in ovsthread_wrapper (aux_=<optimized out>) at
>> lib/ovs-thread.c:354
>> #6 0x00007f9497715494 in start_thread (arg=0x7f948ffff700) at
>> pthread_create.c:333
>> #7 0x00007f9496d09acf in clone () at
>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
>> (gdb) p bucket
>> $1 = (struct hmap_node **) 0x8 <====== why is bucket not a point value?
>> (gdb) p *(struct hmap *) 0x7f9498701c68
>> $2 = {buckets = 0x7f8609f8fc00, one = 0x0, mask = 32767, n = 31040}
>> (gdb) p *(struct hmap_node *) 0x7f871bdc4258
>> $3 = {hash = 203059667, next = 0x7f8707cfe6c8}
>> (gdb) p 203059667&32767
>> $4 = 29139
>> (gdb) p &hmap->buckets[29139]
>> $5 = (struct hmap_node **) 0x7f8609fc8a98
>>
>>
>>> + conn_clean(ct, conn, ctb);
>>> + }
>>> }
>>> }
>>> - ct_lock_unlock(&ct->buckets[i].lock);
>>> + ct_lock_unlock(&ctb->lock);
>>> }
>>>
>>> return 0;
>>>
>>>
>>> which yields conntrack_flush(...) as
>>>
>>> int
>>> conntrack_flush(struct conntrack *ct, const uint16_t *zone)
>>> {
>>> for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
>>> struct conntrack_bucket *ctb = &ct->buckets[i];
>>> ct_lock_lock(&ctb->lock);
>>> for (unsigned j = 0; j < N_CT_TM; j++) {
>>> struct conn *conn, *next;
>>> LIST_FOR_EACH_SAFE (conn, next, exp_node,
>> &ctb->exp_lists[j]) {
>>> if (!zone || *zone == conn->key.zone) {
>>> ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>> conn_clean(ct, conn, ctb);
>>> }
>>> }
>>> }
>>> ct_lock_unlock(&ctb->lock);
>>> }
>>>
>>> return 0;
>>> }
>>>
>>> Thanks Darrell
>>>
>>>
>>>
>>> On Wed, Mar 6, 2019 at 8:06 PM solomon <[email protected]> wrote:
>>>
>>>>
>>>> When i test conntrack, i catch a panic of ovs.
>>>>
>>>> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
>>>> -vconsole:emer -vsyslog:err -vfi'.
>>>> Program terminated with signal SIGSEGV, Segmentation fault.
>>>> #0 0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>> 287 while (*bucket != node) {
>>>> [Current thread is 1 (Thread 0x7f8178dccb00 (LWP 2024338))]
>>>> (gdb) bt
>>>> #0 0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
>>>> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
>>>> #1 conn_clean (ct=ct@entry=0x7f8178c75d98, conn=0x7f734cde0170,
>>>> ctb=ctb@entry=0x7f8178c7fd40) at lib/conntrack.c:815
>>>> #2 0x00005605c5cdd66a in conntrack_flush (ct=0x7f8178c75d98, zone=0x0)
>> at
>>>> lib/conntrack.c:2549
>>>> #3 0x00005605c5bc2b39 in ct_dpif_flush (dpif=0x5605c68a6430,
>>>> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
>>>> #4 0x00005605c5ce17a0 in dpctl_flush_conntrack (argc=argc@entry=1,
>>>> argv=argv@entry=0x5605c697ec30, dpctl_p=dpctl_p@entry=0x7fffee718110)
>> at
>>>> lib/dpctl.c:1388
>>>> #5 0x00005605c5cdeb78 in dpctl_unixctl_handler (conn=0x5605c6959ca0,
>>>> argc=1, argv=0x5605c697ec30, aux=0x5605c5ce1610
>> <dpctl_flush_conntrack>) at
>>>> lib/dpctl.c:2312
>>>> #6 0x00005605c5c806ea in process_command (request=<optimized out>,
>>>> conn=0x5605c6959ca0) at lib/unixctl.c:308
>>>> #7 run_connection (conn=0x5605c6959ca0) at lib/unixctl.c:342
>>>> #8 unixctl_server_run (server=0x5605c6868230) at lib/unixctl.c:393
>>>> #9 0x00005605c5804217 in main (argc=<optimized out>, argv=<optimized
>>>> out>) at vswitchd/ovs-vswitchd.c:126
>>>>
>>>>
>>>> Environment:
>>>> ovs-2.10.1
>>>> dpdk-18.0.2.2
>>>>
>>>> How-To-Repeat:
>>>> 1. configure ovs with snat aciton.
>>>>
>>>> ovs-ofctl -O OpenFlow15 add-group $br_name "group_id=1, type=select,
>>>> selection_method=hash
>>>>
>> bucket=bucket_id=1,weight:100,actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
>>>> "
>>>>
>>>> 2. syn-ddos send tcp syn packet to generate connection tracks.
>>>> 3.
>>>> # ovs-appctl dpctl/ct-get-nconns
>>>> 2063993
>>>> # ovs-appctl dpctl/flush-conntrack
>>>>
>>>> 2019-03-07T03:52:24Z|00001|unixctl|WARN|error communicating with
>>>> unix:/var/run/openvswitch/ovs-vswitchd.2024338.ctl: End of file
>>>> ovs-appctl: ovs-vswitchd: transaction error (End of file)
>>>>
>>>>
>>>> --
>>>> Thanks
>>>> Solomon
>>>>
>>>
>>
>> --
>>
>> Thanks
>> Solomon
>>
>
--
Thanks
Li Wei
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev