Again, thanks for reporting this bug!

I just posted a more complete patch to fix this issue to the ova-dev list. 
Please have a look.

  Jarno

> On Feb 7, 2017, at 4:36 AM, wangyunjian <wangyunj...@huawei.com> wrote:
> 
> I have tested patch without issue. Will you submit it as an official patch?
> 
> Thanks,
> 
> Yunjian.
> 
>>> On Feb 5, 2017, at 10:49 PM, wangyunjian <wangyunj...@huawei.com> wrote:
>>> 
>>> My ovs version is 
>>> openvswitch-2.5.0(http://openvswitch.org/releases/openvswitch-2.5.0.tar.gz).
>>>  
>>> I had modified the code as follows and getted other crash. Do it need a 
>>> lock to protect the operations
>>> of mbridge->mbundles hmap(xbridge->xport hmap) between ovs-vswichd thread 
>>> and the upcall handler(revalidator) thread?
>>> 
>>> 413 static struct mbundle *
>>> 414 mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle)
>>> 415 {
>>> 416     struct mbundle *mbundle;
>>> 417 
>>> 418     HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 
>>> 0),
>>> 419                              &mbridge->mbundles) {
>>> 420         xsleep(2);                                                      
>>>                                                    //only add xsleep(2)
>> 
>> xsleep() causes the thread to quiesce, basically telling the main thread it 
>> is OK to delete the bridge, so the new crash you see is not a bug, but 
>> caused by your change.
>> 
>> Instead, try it out with these changes:
>> 
>> diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c
>> index 6f8079a..15a398f 100644
>> --- a/ofproto/ofproto-dpif-mirror.c
>> +++ b/ofproto/ofproto-dpif-mirror.c
>> @@ -18,7 +18,7 @@
>> 
>> #include <errno.h>
>> 
>> -#include "hmap.h"
>> +#include "cmap.h"
>> #include "hmapx.h"
>> #include "ofproto.h"
>> #include "vlan-bitmap.h"
>> @@ -31,7 +31,7 @@ BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= 
>> MAX_MIRRORS);
>> 
>> struct mbridge {
>>     struct mirror *mirrors[MAX_MIRRORS];
>> -    struct hmap mbundles;
>> +    struct cmap mbundles;
>> 
>>     bool need_revalidate;
>>     bool has_mirrors;
>> @@ -40,7 +40,7 @@ struct mbridge {
>> };
>> 
>> struct mbundle {
>> -    struct hmap_node hmap_node; /* In parent 'mbridge' map. */
>> +    struct cmap_node cmap_node; /* In parent 'mbridge' map. */
>>     struct ofbundle *ofbundle;
>> 
>>     mirror_mask_t src_mirrors;  /* Mirrors triggered when packet received. */
>> @@ -84,7 +84,7 @@ mbridge_create(void)
>>     mbridge = xzalloc(sizeof *mbridge);
>>     ovs_refcount_init(&mbridge->ref_cnt);
>> 
>> -    hmap_init(&mbridge->mbundles);
>> +    cmap_init(&mbridge->mbundles);
>>     return mbridge;
>> }
>> 
>> @@ -101,7 +101,7 @@ mbridge_ref(const struct mbridge *mbridge_)
>> void
>> mbridge_unref(struct mbridge *mbridge)
>> {
>> -    struct mbundle *mbundle, *next;
>> +    struct mbundle *mbundle;
>>     size_t i;
>> 
>>     if (!mbridge) {
>> @@ -115,11 +115,11 @@ mbridge_unref(struct mbridge *mbridge)
>>             }
>>         }
>> 
>> -        HMAP_FOR_EACH_SAFE (mbundle, next, hmap_node, &mbridge->mbundles) {
>> +        CMAP_FOR_EACH (mbundle, cmap_node, &mbridge->mbundles) {
>>             mbridge_unregister_bundle(mbridge, mbundle->ofbundle);
>>         }
>> 
>> -        hmap_destroy(&mbridge->mbundles);
>> +        cmap_destroy(&mbridge->mbundles);
>>         free(mbridge);
>>     }
>> }
>> @@ -147,7 +147,7 @@ mbridge_register_bundle(struct mbridge *mbridge, struct 
>> ofbundle *ofbundle)
>> 
>>     mbundle = xzalloc(sizeof *mbundle);
>>     mbundle->ofbundle = ofbundle;
>> -    hmap_insert(&mbridge->mbundles, &mbundle->hmap_node,
>> +    cmap_insert(&mbridge->mbundles, &mbundle->cmap_node,
>>                 hash_pointer(ofbundle, 0));
>> }
>> 
>> @@ -173,8 +173,9 @@ mbridge_unregister_bundle(struct mbridge *mbridge, 
>> struct ofbundle *ofbundle)
>>         }
>>     }
>> 
>> -    hmap_remove(&mbridge->mbundles, &mbundle->hmap_node);
>> -    free(mbundle);
>> +    cmap_remove(&mbridge->mbundles, &mbundle->cmap_node,
>> +                hash_pointer(ofbundle, 0));
>> +    ovsrcu_postpone(free, mbundle);
>> }
>> 
>> mirror_mask_t
>> @@ -269,7 +270,7 @@ mirror_set(struct mbridge *mbridge, void *aux, const 
>> char *name,
>> 
>>     /* Update mbundles. */
>>     mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
>> -    HMAP_FOR_EACH (mbundle, hmap_node, &mirror->mbridge->mbundles) {
>> +    CMAP_FOR_EACH (mbundle, cmap_node, &mirror->mbridge->mbundles) {
>>         if (hmapx_contains(&mirror->srcs, mbundle)) {
>>             mbundle->src_mirrors |= mirror_bit;
>>         } else {
>> @@ -308,7 +309,7 @@ mirror_destroy(struct mbridge *mbridge, void *aux)
>>     }
>> 
>>     mirror_bit = MIRROR_MASK_C(1) << mirror->idx;
>> -    HMAP_FOR_EACH (mbundle, hmap_node, &mbridge->mbundles) {
>> +    CMAP_FOR_EACH (mbundle, cmap_node, &mbridge->mbundles) {
>>         mbundle->src_mirrors &= ~mirror_bit;
>>         mbundle->dst_mirrors &= ~mirror_bit;
>>         mbundle->mirror_out &= ~mirror_bit;
>> @@ -414,9 +415,9 @@ static struct mbundle *
>> mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle)
>> {
>>     struct mbundle *mbundle;
>> +    uint32_t hash = hash_pointer(ofbundle, 0);
>> 
>> -    HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0),
>> -                             &mbridge->mbundles) {
>> +    CMAP_FOR_EACH_WITH_HASH (mbundle, cmap_node, hash, &mbridge->mbundles) {
>>         if (mbundle->ofbundle == ofbundle) {
>>             return mbundle;
>>         }
>> @@ -424,7 +425,7 @@ mbundle_lookup(const struct mbridge *mbridge, struct 
>> ofbundle *ofbundle)
>>     return NULL;
>> }
>> 
>> -/* Looks up each of the 'n_ofbundlees' pointers in 'ofbundlees' as mbundles 
>> and
>> +/* Looks up each of the 'n_ofbundles' pointers in 'ofbundles' as mbundles 
>> and
>>  * adds the ones that are found to 'mbundles'. */
>> static void
>> mbundle_lookup_multiple(const struct mbridge *mbridge,
>> 
>> 
>>  Jarno
>> 
>> 
>>> 421         if (mbundle->ofbundle == ofbundle) {
>>> 422             return mbundle;
>>> 423         }
>>> 424     }
>>> 425     return NULL;
>>> 426 }
>>> 427 
>>> 
>>> Call Trace:
>>> Program received signal SIGSEGV, Segmentation fault.
>>> [Switching to Thread 0x7fadfefd1700 (LWP 25029)]
>>> mbundle_lookup (mbridge=<optimized out>, ofbundle=0x7fae03a55210) at 
>>> ofproto/ofproto-dpif-mirror.c:421
>>> 421         if (mbundle->ofbundle == ofbundle) {
>>> (gdb) bt
>>> #0  mbundle_lookup (mbridge=<optimized out>, ofbundle=0x7fae03a55210) at 
>>> ofproto/ofproto-dpif-mirror.c:421
>>> #1  0x00007fae029ec519 in mirror_bundle_out (mbridge=<optimized out>, 
>>> ofbundle=<optimized out>) at ofproto/ofproto-dpif-mirror.c:183
>>> #2  0x00007fae029fba61 in xbundle_mirror_out (xbridge=<optimized out>, 
>>> xbundle=0x7fae03a4e910) at ofproto/ofproto-dpif-xlate.c:1524
>>> #3  xlate_normal (ctx=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:2360
>>> #4  xlate_output_action (ctx=ctx@entry=0x7fadfef94420, port=<optimized 
>>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at 
>>> ofproto/ofproto-dpif-xlate.c:3829
>>> #5  0x00007fae029f854f in do_xlate_actions 
>>> (ofpacts=ofpacts@entry=0x7fade800e5a8, ofpacts_len=ofpacts_len@entry=8, 
>>> ctx=ctx@entry=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:4384
>>> #6  0x00007fae029fd6ec in xlate_actions (xin=xin@entry=0x7fadfef955c0, 
>>> xout=xout@entry=0x7fadfefb3178) at ofproto/ofproto-dpif-xlate.c:5272
>>> #7  0x00007fae029f16f6 in upcall_xlate (wc=0x7fadfefb31d0, 
>>> odp_actions=0x7fadfefb3190, upcall=0x7fadfefb3120, udpif=0x7fae03a81040) at 
>>> ofproto/ofproto-dpif-upcall.c:1068
>>> #8  process_upcall (udpif=udpif@entry=0x7fae03a81040, 
>>> upcall=upcall@entry=0x7fadfefb3120, 
>>> odp_actions=odp_actions@entry=0x7fadfefb3190, wc=wc@entry=0x7fadfefb31d0) 
>>> at ofproto/ofproto-dpif-upcall.c:1206
>>> #9  0x00007fae029f3960 in recv_upcalls (handler=0x7fae03a4f5c8, 
>>> handler=0x7fae03a4f5c8) at ofproto/ofproto-dpif-upcall.c:778
>>> #10 0x00007fae029f3e1c in udpif_upcall_handler (arg=0x7fae03a4f5c8) at 
>>> ofproto/ofproto-dpif-upcall.c:696
>>> #11 0x00007fae02a7e596 in ovsthread_wrapper (aux_=<optimized out>) at 
>>> lib/ovs-thread.c:340
>>> #12 0x00007fae01d14dc5 in start_thread () from /usr/lib64/libpthread.so.0
>>> #13 0x00007fae0132271d in clone () from /usr/lib64/libc.so.6
>>> (gdb) p mbundle
>>> $1 = (struct mbundle *) 0xcccccccccccccccc
>>> 
>>> Program received signal SIGSEGV, Segmentation fault.
>>> [Switching to Thread 0x7fadfefd1700 (LWP 3570)]
>>> 0x00007fae029f68a0 in get_ofp_port (xbridge=xbridge@entry=0x7fae03a81680, 
>>> ofp_port=ofp_port@entry=1) at ofproto/ofproto-dpif-xlate.c:1400
>>> 1400                if (xport->ofp_port == ofp_port) {
>>> (gdb) bt
>>> #0  0x00007fae029f68a0 in get_ofp_port 
>>> (xbridge=xbridge@entry=0x7fae03a81680, ofp_port=ofp_port@entry=1) at 
>>> ofproto/ofproto-dpif-xlate.c:1400
>>> #1  0x00007fae029f692f in lookup_input_bundle (xbridge=0x7fae03a81680, 
>>> in_port=1, warn=<optimized out>, in_xportp=in_xportp@entry=0x7fadfef93e90) 
>>> at ofproto/ofproto-dpif-xlate.c:1550
>>> #2  0x00007fae029fba23 in xlate_normal (ctx=0x7fadfef94420) at 
>>> ofproto/ofproto-dpif-xlate.c:2339
>>> #3  xlate_output_action (ctx=ctx@entry=0x7fadfef94420, port=<optimized 
>>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at 
>>> ofproto/ofproto-dpif-xlate.c:3829
>>> #4  0x00007fae029f854f in do_xlate_actions 
>>> (ofpacts=ofpacts@entry=0x7fae03a7f0a8, ofpacts_len=ofpacts_len@entry=8, 
>>> ctx=ctx@entry=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:4384
>>> #5  0x00007fae029fd6ec in xlate_actions (xin=xin@entry=0x7fadfef955c0, 
>>> xout=xout@entry=0x7fadfefb3178) at ofproto/ofproto-dpif-xlate.c:5272
>>> #6  0x00007fae029f16f6 in upcall_xlate (wc=0x7fadfefb31d0, 
>>> odp_actions=0x7fadfefb3190, upcall=0x7fadfefb3120, udpif=0x7fae03a4fb60) at 
>>> ofproto/ofproto-dpif-upcall.c:1068
>>> #7  process_upcall (udpif=udpif@entry=0x7fae03a4fb60, 
>>> upcall=upcall@entry=0x7fadfefb3120, 
>>> odp_actions=odp_actions@entry=0x7fadfefb3190, wc=wc@entry=0x7fadfefb31d0) 
>>> at ofproto/ofproto-dpif-upcall.c:1206
>>> #8  0x00007fae029f3960 in recv_upcalls (handler=0x7fae03a7e7b8, 
>>> handler=0x7fae03a7e7b8) at ofproto/ofproto-dpif-upcall.c:778
>>> #9  0x00007fae029f3e1c in udpif_upcall_handler (arg=0x7fae03a7e7b8) at 
>>> ofproto/ofproto-dpif-upcall.c:696
>>> #10 0x00007fae02a7e596 in ovsthread_wrapper (aux_=<optimized out>) at 
>>> lib/ovs-thread.c:340
>>> #11 0x00007fae01d14dc5 in start_thread () from /usr/lib64/libpthread.so.0
>>> #12 0x00007fae0132271d in clone () from /usr/lib64/libc.so.6
>>> (gdb) p xport
>>> $1 = (struct xport *) 0x19
>>> 
>>>> From: nickcooper-zhangtonghao [mailto:n...@opencloud.tech] 
>>>> Sent: Saturday, February 04, 2017 11:48 PM
>>>> To: wangyunjian <wangyunj...@huawei.com>
>>>> Cc: b...@ovn.org; d...@openvswitch.org; caihe <ca...@huawei.com>; 
>>>> Huangjian (J) <huangjian.huangj...@huawei.com>
>>>> Subject: Re: [ovs-dev] [BUG] upcall handler thread crash
>>>> 
>>>> Hi, what’s the OvS version you tested. I didn’t get the crash with master 
>>>> version.
>>>> The test script is described as below.
>>>> 
>>>> ovs-vsctl add-br br0
>>>> ovs-vsctl add-port br0 eth1
>>>> for i in `seq 0 1000`;
>>>> do
>>>>   ovs-vsctl add-port br0 eth2
>>>>   ovs-vsctl del-port br0 eth2
>>>> done
>>>> 
>>>> 
>>>> Thanks.
>>>> Nick
>>>> 
>>>> On Feb 4, 2017, at 7:21 PM, wangyunjian <wangyunj...@huawei.com> wrote:
>>>> 
>>>> Recently, write a script add and delete port repeatly, ovs upcall handler 
>>>> thread crash with the following trace.
>>>> In the code bellow, weather the operations of mbridge->mbundles hmap 
>>>> should with a lock to protect content between ovs-vswichd thread and the 
>>>> upcall handler thread:
>>>> 
>>>> static struct mbundle *
>>>> mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle)
>>>> {
>>>>  struct mbundle *mbundle;
>>>> 
>>>>  HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0),
>>>>                           &mbridge->mbundles) {
>>>>      if (mbundle->ofbundle == ofbundle) {
>>>>          return mbundle;
>>>>      }
>>>>  }
>>>>  return NULL;
>>>> }
>>>> 
>>>> Call Trace:
>>>> #0  0x000000000044d838 in mbundle_lookup (mbridge=0x7fbf68000cc0, 
>>>> ofbundle=0x7fbf7007c3d0) at ofproto/ofproto_dpif_mirror.c:472
>>>> #1  0x000000000044da15 in mirror_bundle_out (mbridge=<optimized out>, 
>>>> ofbundle=<optimized out>) at ofproto/ofproto_dpif_mirror.c:192
>>>> #2  0x0000000000448658 in xbundle_mirror_out (xbridge=0x7fbf5c6468a0, 
>>>> xbundle=0x7fbf3d48a160) at ofproto/ofproto_dpif_xlate.c:1556
>>>> #3  xlate_normal_flood (ctx=ctx@entry=0x7fbf7729e3d0, 
>>>> in_xbundle=in_xbundle@entry=0x7fbf5c22f870, vlan=vlan@entry=100) at 
>>>> ofproto/ofproto_dpif_xlate.c:2525
>>>> #4  0x0000000000448e7e in xlate_normal (ctx=0x7fbf7729e3d0) at 
>>>> ofproto/ofproto_dpif_xlate.c:2724
>>>> #5 xlate_output_action (ctx=ctx@entry=0x7fbf7729e3d0, port=<optimized 
>>>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at 
>>>> ofproto/ofproto_dpif_xlate.c:4061
>>>> #6 0x0000000000445147 in do_xlate_actions (ofpacts=0x6eb7288, 
>>>> ofpacts_len=16, ctx=ctx@entry=0x7fbf7729e3d0) at 
>>>> ofproto/ofproto_dpif_xlate.c:4616
>>>> #7 0x0000000000446481 in xlate_recursively (rule=0x6eb7100, 
>>>> ctx=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:3445
>>>> #8 xlate_table_action (ctx=0x7fbf7729e3d0, in_port=<optimized out>, 
>>>> table_id=<optimized out>, may_packet_in=<optimized out>, 
>>>> honor_table_miss=<optimized out>) at ofproto/ofproto_dpif_xlate.c:3513
>>>> #9 0x00000000004475a2 in compose_output_action__ 
>>>> (ctx=ctx@entry=0x7fbf7729e3d0, ofp_port=<optimized out>, xr=<optimized 
>>>> out>, check_stp=check_stp@entry=true) at ofproto/ofproto_dpif_xlate.c:3206
>>>> #10 0x0000000000447bbf in compose_output_action (xr=<optimized out>, 
>>>> ofp_port=<optimized out>, ctx=0x7fbf7729e3d0) at 
>>>> ofproto/ofproto_dpif_xlate.c:3426
>>>> #11 output_normal (ctx=ctx@entry=0x7fbf7729e3d0, 
>>>> out_xbundle=out_xbundle@entry=0x7fbf5cdf4aa0, vlan=vlan@entry=0) at 
>>>> ofproto/ofproto_dpif_xlate.c:2073
>>>> #12 0x00000000004486ae in xlate_normal_flood 
>>>> (ctx=ctx@entry=0x7fbf7729e3d0, in_xbundle=in_xbundle@entry=0x7fbf5e1d44d0, 
>>>> vlan=vlan@entry=0) at ofproto/ofproto_dpif_xlate.c:2529
>>>> #13 0x0000000000448e7e in xlate_normal (ctx=0x7fbf7729e3d0) at 
>>>> ofproto/ofproto_dpif_xlate.c:2724
>>>> #14 xlate_output_action (ctx=ctx@entry=0x7fbf7729e3d0, port=<optimized 
>>>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at 
>>>> ofproto/ofproto_dpif_xlate.c:4061
>>>> #15 0x0000000000445147 in do_xlate_actions 
>>>> (ofpacts=ofpacts@entry=0x7fbf70005ae8, ofpacts_len=ofpacts_len@entry=8, 
>>>> ctx=ctx@entry=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:4616
>>>> #16 0x000000000044a739 in xlate_actions (xin=xin@entry=0x7fbf7729f570, 
>>>> xout=xout@entry=0x7fbf772c0b18) at ofproto/ofproto_dpif_xlate.c:5509
>>>> #17 0x000000000043e4b6 in upcall_xlate (wc=0x7fbf772c0b70, 
>>>> odp_actions=0x7fbf772c0b30, upcall=0x7fbf772c0ac0, udpif=0x6e164a0) at 
>>>> ofproto/ofproto_dpif_upcall.c:1082
>>>> #18 process_upcall (udpif=udpif@entry=0x6e164a0, 
>>>> upcall=upcall@entry=0x7fbf772c0ac0, 
>>>> odp_actions=odp_actions@entry=0x7fbf772c0b30, wc=wc@entry=0x7fbf772c0b70) 
>>>> at ofproto/ofproto_dpif_upcall.c:1220
>>>> #19 0x00000000004407d3 in recv_upcalls (handler=0x7fbf58944810, 
>>>> handler=0x7fbf58944810) at ofproto/ofproto_dpif_upcall.c:784
>>>> #20 0x0000000000440cca in udpif_upcall_handler (arg=0x7fbf58944810) at 
>>>> ofproto/ofproto_dpif_upcall.c:701
>>>> #21 0x00000000004c95e4 in ovsthread_wrapper (aux_=<optimized out>) at 
>>>> lib/ovs_thread.c:649
>>>> #22 0x00007fbf7aeaedc5 in start_thread () from /usr/lib64/libpthread.so.0
>>>> #23 0x00007fbf79a5e71d in clone () from /usr/lib64/libc.so.6
>>>> 
>>>> #0  0x000000000044d838 in mbundle_lookup (mbridge=0x7fbf68000cc0, 
>>>> ofbundle=0x7fbf7007c3d0)
>>>> 472                if (mbundle->ofbundle == ofbundle) {
>>>> (gdb) p mbundle
>>>> $1 = (struct mbundle *) 0x31
>>>> 
>>> _______________________________________________
>>> dev mailing list
>>> d...@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to