Thanks for your help Solomon
Can you give the following debug patch a spin ?
I will continue to try to repo locally.
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 5410ab4..6968c03 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -250,6 +250,22 @@ conn_key_cmp(const struct conn_key *key1, const struct
conn_key *key2)
return 1;
}
+static bool
+conn_key_present(struct conntrack *ct, struct conntrack_bucket *ctb,
+ const struct conn_key *key)
+ OVS_REQUIRES(ctb->lock)
+{
+ struct conn *conn;
+ uint32_t hash = conn_key_hash(key, ct->hash_basis);
+
+ HMAP_FOR_EACH_WITH_HASH (conn, node, hash, &ctb->connections) {
+ if (!conn_key_cmp(&conn->key, key)) {
+ return true;
+ }
+ }
+ return false;
+}
+
static void
ct_print_conn_info(const struct conn *c, const char *log_msg,
enum vlog_level vll, bool force, bool rl_on)
@@ -812,7 +828,14 @@ conn_clean(struct conntrack *ct, struct conn *conn,
expectation_clean(ct, &conn->key, ct->hash_basis);
}
ovs_list_remove(&conn->exp_node);
- hmap_remove(&ctb->connections, &conn->node);
+
+ if (conn_key_present(ct, ctb, &conn->key)) {
+ hmap_remove(&ctb->connections, &conn->node);
+ } else {
+ char *log_msg = xasprintf("conn_clean: conn not present in hmap");
+ ct_print_conn_info(conn, log_msg, VLL_WARN, true, false);
+ free(log_msg);
+ }
atomic_count_dec(&ct->n_conn);
if (conn->nat_info) {
nat_clean(ct, conn, ctb);
@@ -1383,19 +1406,18 @@ sweep_bucket(struct conntrack *ct, struct
conntrack_bucket *ctb,
for (unsigned i = 0; i < N_CT_TM; i++) {
LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[i]) {
- if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
- if (!conn_expired(conn, now) || count >= limit) {
- min_expiration = MIN(min_expiration, conn->expiration);
- if (count >= limit) {
- /* Do not check other lists. */
- COVERAGE_INC(conntrack_long_cleanup);
- return min_expiration;
- }
- break;
+ ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+ if (!conn_expired(conn, now) || count >= limit) {
+ min_expiration = MIN(min_expiration, conn->expiration);
+ if (count >= limit) {
+ /* Do not check other lists. */
+ COVERAGE_INC(conntrack_long_cleanup);
+ return min_expiration;
}
- conn_clean(ct, conn, ctb);
- count++;
+ break;
}
+ conn_clean(ct, conn, ctb);
+ count++;
}
}
return min_expiration;
@@ -2540,16 +2562,18 @@ int
conntrack_flush(struct conntrack *ct, const uint16_t *zone)
{
for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
- struct conn *conn, *next;
-
- ct_lock_lock(&ct->buckets[i].lock);
- HMAP_FOR_EACH_SAFE (conn, next, node, &ct->buckets[i].connections)
{
- if ((!zone || *zone == conn->key.zone) &&
- (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
- conn_clean(ct, conn, &ct->buckets[i]);
+ struct conntrack_bucket *ctb = &ct->buckets[i];
+ ct_lock_lock(&ctb->lock);
+ for (unsigned j = 0; j < N_CT_TM; j++) {
+ struct conn *conn, *next;
+ LIST_FOR_EACH_SAFE (conn, next, exp_node, &ctb->exp_lists[j]) {
+ if (!zone || *zone == conn->key.zone) {
+ ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
+ conn_clean(ct, conn, ctb);
+ }
}
}
- ct_lock_unlock(&ct->buckets[i].lock);
+ ct_lock_unlock(&ctb->lock);
}
return 0;
(END)
On Wed, Mar 6, 2019 at 11:33 PM solomon <[email protected]> wrote:
> Darrell Ball wrote:
> > + LIST_FOR_EACH_SAFE (conn, next, exp_node,
> &ctb->exp_lists[j]) {
> > + if (!zone || *zone == conn->key.zone) {
> > + ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>
> why do we need this assert?
> Clean the CT_CONN_TYPE_DEFAULT type in conntrack_flush(), and clean
> CT_CONN_TYPE_UN_NAT in nat_clean() like following:
> + if ((!zone || *zone == conn->key.zone) &&
> + (conn->conn_type == CT_CONN_TYPE_DEFAULT)) {
> + //ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>
>
> with the above code, catch an another panic which in ct_clean thread.
> I have see the same panic without changeing the code.
> Both ct_clean and flush-conntrack, can catch the bad bucket value.
>
> #0 0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
> 287 while (*bucket != node) {
> [Current thread is 1 (Thread 0x7f948ffff700 (LWP 2085796))]
> (gdb) bt
> #0 0x0000562ae7402553 in hmap_remove (node=0x7f871bdc4258,
> hmap=0x7f9498701c68) at ./include/openvswitch/hmap.h:287
> #1 conn_clean (ct=ct@entry=0x7f9498700d98, conn=0x7f871bdc41b0,
> ctb=ctb@entry=0x7f9498701c38) at lib/conntrack.c:815
> #2 0x0000562ae7402a28 in sweep_bucket (limit=3906, now=1223168469,
> ctb=<optimized out>, ct=0x7f9498700d98) at lib/conntrack.c:1396
> #3 conntrack_clean (now=1223168469, ct=0x7f9498700d98) at
> lib/conntrack.c:1433
> #4 clean_thread_main (f_=0x7f9498700d98) at lib/conntrack.c:1488
> #5 0x0000562ae737a48f in ovsthread_wrapper (aux_=<optimized out>) at
> lib/ovs-thread.c:354
> #6 0x00007f9497715494 in start_thread (arg=0x7f948ffff700) at
> pthread_create.c:333
> #7 0x00007f9496d09acf in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
> (gdb) p bucket
> $1 = (struct hmap_node **) 0x8 <====== why is bucket not a point value?
> (gdb) p *(struct hmap *) 0x7f9498701c68
> $2 = {buckets = 0x7f8609f8fc00, one = 0x0, mask = 32767, n = 31040}
> (gdb) p *(struct hmap_node *) 0x7f871bdc4258
> $3 = {hash = 203059667, next = 0x7f8707cfe6c8}
> (gdb) p 203059667&32767
> $4 = 29139
> (gdb) p &hmap->buckets[29139]
> $5 = (struct hmap_node **) 0x7f8609fc8a98
>
>
> > + conn_clean(ct, conn, ctb);
> > + }
> > }
> > }
> > - ct_lock_unlock(&ct->buckets[i].lock);
> > + ct_lock_unlock(&ctb->lock);
> > }
> >
> > return 0;
> >
> >
> > which yields conntrack_flush(...) as
> >
> > int
> > conntrack_flush(struct conntrack *ct, const uint16_t *zone)
> > {
> > for (unsigned i = 0; i < CONNTRACK_BUCKETS; i++) {
> > struct conntrack_bucket *ctb = &ct->buckets[i];
> > ct_lock_lock(&ctb->lock);
> > for (unsigned j = 0; j < N_CT_TM; j++) {
> > struct conn *conn, *next;
> > LIST_FOR_EACH_SAFE (conn, next, exp_node,
> &ctb->exp_lists[j]) {
> > if (!zone || *zone == conn->key.zone) {
> > ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
> > conn_clean(ct, conn, ctb);
> > }
> > }
> > }
> > ct_lock_unlock(&ctb->lock);
> > }
> >
> > return 0;
> > }
> >
> > Thanks Darrell
> >
> >
> >
> > On Wed, Mar 6, 2019 at 8:06 PM solomon <[email protected]> wrote:
> >
> >>
> >> When i test conntrack, i catch a panic of ovs.
> >>
> >> Core was generated by `ovs-vswitchd unix:/var/run/openvswitch/db.sock
> >> -vconsole:emer -vsyslog:err -vfi'.
> >> Program terminated with signal SIGSEGV, Segmentation fault.
> >> #0 0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
> >> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
> >> 287 while (*bucket != node) {
> >> [Current thread is 1 (Thread 0x7f8178dccb00 (LWP 2024338))]
> >> (gdb) bt
> >> #0 0x00005605c5cd7553 in hmap_remove (node=0x7f734cde0218,
> >> hmap=0x7f8178c7fd70) at ./include/openvswitch/hmap.h:287
> >> #1 conn_clean (ct=ct@entry=0x7f8178c75d98, conn=0x7f734cde0170,
> >> ctb=ctb@entry=0x7f8178c7fd40) at lib/conntrack.c:815
> >> #2 0x00005605c5cdd66a in conntrack_flush (ct=0x7f8178c75d98, zone=0x0)
> at
> >> lib/conntrack.c:2549
> >> #3 0x00005605c5bc2b39 in ct_dpif_flush (dpif=0x5605c68a6430,
> >> zone=zone@entry=0x0, tuple=tuple@entry=0x0) at lib/ct-dpif.c:140
> >> #4 0x00005605c5ce17a0 in dpctl_flush_conntrack (argc=argc@entry=1,
> >> argv=argv@entry=0x5605c697ec30, dpctl_p=dpctl_p@entry=0x7fffee718110)
> at
> >> lib/dpctl.c:1388
> >> #5 0x00005605c5cdeb78 in dpctl_unixctl_handler (conn=0x5605c6959ca0,
> >> argc=1, argv=0x5605c697ec30, aux=0x5605c5ce1610
> <dpctl_flush_conntrack>) at
> >> lib/dpctl.c:2312
> >> #6 0x00005605c5c806ea in process_command (request=<optimized out>,
> >> conn=0x5605c6959ca0) at lib/unixctl.c:308
> >> #7 run_connection (conn=0x5605c6959ca0) at lib/unixctl.c:342
> >> #8 unixctl_server_run (server=0x5605c6868230) at lib/unixctl.c:393
> >> #9 0x00005605c5804217 in main (argc=<optimized out>, argv=<optimized
> >> out>) at vswitchd/ovs-vswitchd.c:126
> >>
> >>
> >> Environment:
> >> ovs-2.10.1
> >> dpdk-18.0.2.2
> >>
> >> How-To-Repeat:
> >> 1. configure ovs with snat aciton.
> >>
> >> ovs-ofctl -O OpenFlow15 add-group $br_name "group_id=1, type=select,
> >> selection_method=hash
> >>
> bucket=bucket_id=1,weight:100,actions=ct(nat(src=172.16.1.1-172.255.255.255),commit,table=40)
> >> "
> >>
> >> 2. syn-ddos send tcp syn packet to generate connection tracks.
> >> 3.
> >> # ovs-appctl dpctl/ct-get-nconns
> >> 2063993
> >> # ovs-appctl dpctl/flush-conntrack
> >>
> >> 2019-03-07T03:52:24Z|00001|unixctl|WARN|error communicating with
> >> unix:/var/run/openvswitch/ovs-vswitchd.2024338.ctl: End of file
> >> ovs-appctl: ovs-vswitchd: transaction error (End of file)
> >>
> >>
> >> --
> >> Thanks
> >> Solomon
> >>
> >
>
> --
>
> Thanks
> Solomon
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev