Again, thanks for reporting this bug! I just posted a more complete patch to fix this issue to the ova-dev list. Please have a look.
Jarno > On Feb 7, 2017, at 4:36 AM, wangyunjian <wangyunj...@huawei.com> wrote: > > I have tested patch without issue. Will you submit it as an official patch? > > Thanks, > > Yunjian. > >>> On Feb 5, 2017, at 10:49 PM, wangyunjian <wangyunj...@huawei.com> wrote: >>> >>> My ovs version is >>> openvswitch-2.5.0(http://openvswitch.org/releases/openvswitch-2.5.0.tar.gz). >>> >>> I had modified the code as follows and getted other crash. Do it need a >>> lock to protect the operations >>> of mbridge->mbundles hmap(xbridge->xport hmap) between ovs-vswichd thread >>> and the upcall handler(revalidator) thread? >>> >>> 413 static struct mbundle * >>> 414 mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle) >>> 415 { >>> 416 struct mbundle *mbundle; >>> 417 >>> 418 HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, >>> 0), >>> 419 &mbridge->mbundles) { >>> 420 xsleep(2); >>> //only add xsleep(2) >> >> xsleep() causes the thread to quiesce, basically telling the main thread it >> is OK to delete the bridge, so the new crash you see is not a bug, but >> caused by your change. >> >> Instead, try it out with these changes: >> >> diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c >> index 6f8079a..15a398f 100644 >> --- a/ofproto/ofproto-dpif-mirror.c >> +++ b/ofproto/ofproto-dpif-mirror.c >> @@ -18,7 +18,7 @@ >> >> #include <errno.h> >> >> -#include "hmap.h" >> +#include "cmap.h" >> #include "hmapx.h" >> #include "ofproto.h" >> #include "vlan-bitmap.h" >> @@ -31,7 +31,7 @@ BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= >> MAX_MIRRORS); >> >> struct mbridge { >> struct mirror *mirrors[MAX_MIRRORS]; >> - struct hmap mbundles; >> + struct cmap mbundles; >> >> bool need_revalidate; >> bool has_mirrors; >> @@ -40,7 +40,7 @@ struct mbridge { >> }; >> >> struct mbundle { >> - struct hmap_node hmap_node; /* In parent 'mbridge' map. */ >> + struct cmap_node cmap_node; /* In parent 'mbridge' map. */ >> struct ofbundle *ofbundle; >> >> mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ >> @@ -84,7 +84,7 @@ mbridge_create(void) >> mbridge = xzalloc(sizeof *mbridge); >> ovs_refcount_init(&mbridge->ref_cnt); >> >> - hmap_init(&mbridge->mbundles); >> + cmap_init(&mbridge->mbundles); >> return mbridge; >> } >> >> @@ -101,7 +101,7 @@ mbridge_ref(const struct mbridge *mbridge_) >> void >> mbridge_unref(struct mbridge *mbridge) >> { >> - struct mbundle *mbundle, *next; >> + struct mbundle *mbundle; >> size_t i; >> >> if (!mbridge) { >> @@ -115,11 +115,11 @@ mbridge_unref(struct mbridge *mbridge) >> } >> } >> >> - HMAP_FOR_EACH_SAFE (mbundle, next, hmap_node, &mbridge->mbundles) { >> + CMAP_FOR_EACH (mbundle, cmap_node, &mbridge->mbundles) { >> mbridge_unregister_bundle(mbridge, mbundle->ofbundle); >> } >> >> - hmap_destroy(&mbridge->mbundles); >> + cmap_destroy(&mbridge->mbundles); >> free(mbridge); >> } >> } >> @@ -147,7 +147,7 @@ mbridge_register_bundle(struct mbridge *mbridge, struct >> ofbundle *ofbundle) >> >> mbundle = xzalloc(sizeof *mbundle); >> mbundle->ofbundle = ofbundle; >> - hmap_insert(&mbridge->mbundles, &mbundle->hmap_node, >> + cmap_insert(&mbridge->mbundles, &mbundle->cmap_node, >> hash_pointer(ofbundle, 0)); >> } >> >> @@ -173,8 +173,9 @@ mbridge_unregister_bundle(struct mbridge *mbridge, >> struct ofbundle *ofbundle) >> } >> } >> >> - hmap_remove(&mbridge->mbundles, &mbundle->hmap_node); >> - free(mbundle); >> + cmap_remove(&mbridge->mbundles, &mbundle->cmap_node, >> + hash_pointer(ofbundle, 0)); >> + ovsrcu_postpone(free, mbundle); >> } >> >> mirror_mask_t >> @@ -269,7 +270,7 @@ mirror_set(struct mbridge *mbridge, void *aux, const >> char *name, >> >> /* Update mbundles. */ >> mirror_bit = MIRROR_MASK_C(1) << mirror->idx; >> - HMAP_FOR_EACH (mbundle, hmap_node, &mirror->mbridge->mbundles) { >> + CMAP_FOR_EACH (mbundle, cmap_node, &mirror->mbridge->mbundles) { >> if (hmapx_contains(&mirror->srcs, mbundle)) { >> mbundle->src_mirrors |= mirror_bit; >> } else { >> @@ -308,7 +309,7 @@ mirror_destroy(struct mbridge *mbridge, void *aux) >> } >> >> mirror_bit = MIRROR_MASK_C(1) << mirror->idx; >> - HMAP_FOR_EACH (mbundle, hmap_node, &mbridge->mbundles) { >> + CMAP_FOR_EACH (mbundle, cmap_node, &mbridge->mbundles) { >> mbundle->src_mirrors &= ~mirror_bit; >> mbundle->dst_mirrors &= ~mirror_bit; >> mbundle->mirror_out &= ~mirror_bit; >> @@ -414,9 +415,9 @@ static struct mbundle * >> mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle) >> { >> struct mbundle *mbundle; >> + uint32_t hash = hash_pointer(ofbundle, 0); >> >> - HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0), >> - &mbridge->mbundles) { >> + CMAP_FOR_EACH_WITH_HASH (mbundle, cmap_node, hash, &mbridge->mbundles) { >> if (mbundle->ofbundle == ofbundle) { >> return mbundle; >> } >> @@ -424,7 +425,7 @@ mbundle_lookup(const struct mbridge *mbridge, struct >> ofbundle *ofbundle) >> return NULL; >> } >> >> -/* Looks up each of the 'n_ofbundlees' pointers in 'ofbundlees' as mbundles >> and >> +/* Looks up each of the 'n_ofbundles' pointers in 'ofbundles' as mbundles >> and >> * adds the ones that are found to 'mbundles'. */ >> static void >> mbundle_lookup_multiple(const struct mbridge *mbridge, >> >> >> Jarno >> >> >>> 421 if (mbundle->ofbundle == ofbundle) { >>> 422 return mbundle; >>> 423 } >>> 424 } >>> 425 return NULL; >>> 426 } >>> 427 >>> >>> Call Trace: >>> Program received signal SIGSEGV, Segmentation fault. >>> [Switching to Thread 0x7fadfefd1700 (LWP 25029)] >>> mbundle_lookup (mbridge=<optimized out>, ofbundle=0x7fae03a55210) at >>> ofproto/ofproto-dpif-mirror.c:421 >>> 421 if (mbundle->ofbundle == ofbundle) { >>> (gdb) bt >>> #0 mbundle_lookup (mbridge=<optimized out>, ofbundle=0x7fae03a55210) at >>> ofproto/ofproto-dpif-mirror.c:421 >>> #1 0x00007fae029ec519 in mirror_bundle_out (mbridge=<optimized out>, >>> ofbundle=<optimized out>) at ofproto/ofproto-dpif-mirror.c:183 >>> #2 0x00007fae029fba61 in xbundle_mirror_out (xbridge=<optimized out>, >>> xbundle=0x7fae03a4e910) at ofproto/ofproto-dpif-xlate.c:1524 >>> #3 xlate_normal (ctx=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:2360 >>> #4 xlate_output_action (ctx=ctx@entry=0x7fadfef94420, port=<optimized >>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at >>> ofproto/ofproto-dpif-xlate.c:3829 >>> #5 0x00007fae029f854f in do_xlate_actions >>> (ofpacts=ofpacts@entry=0x7fade800e5a8, ofpacts_len=ofpacts_len@entry=8, >>> ctx=ctx@entry=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:4384 >>> #6 0x00007fae029fd6ec in xlate_actions (xin=xin@entry=0x7fadfef955c0, >>> xout=xout@entry=0x7fadfefb3178) at ofproto/ofproto-dpif-xlate.c:5272 >>> #7 0x00007fae029f16f6 in upcall_xlate (wc=0x7fadfefb31d0, >>> odp_actions=0x7fadfefb3190, upcall=0x7fadfefb3120, udpif=0x7fae03a81040) at >>> ofproto/ofproto-dpif-upcall.c:1068 >>> #8 process_upcall (udpif=udpif@entry=0x7fae03a81040, >>> upcall=upcall@entry=0x7fadfefb3120, >>> odp_actions=odp_actions@entry=0x7fadfefb3190, wc=wc@entry=0x7fadfefb31d0) >>> at ofproto/ofproto-dpif-upcall.c:1206 >>> #9 0x00007fae029f3960 in recv_upcalls (handler=0x7fae03a4f5c8, >>> handler=0x7fae03a4f5c8) at ofproto/ofproto-dpif-upcall.c:778 >>> #10 0x00007fae029f3e1c in udpif_upcall_handler (arg=0x7fae03a4f5c8) at >>> ofproto/ofproto-dpif-upcall.c:696 >>> #11 0x00007fae02a7e596 in ovsthread_wrapper (aux_=<optimized out>) at >>> lib/ovs-thread.c:340 >>> #12 0x00007fae01d14dc5 in start_thread () from /usr/lib64/libpthread.so.0 >>> #13 0x00007fae0132271d in clone () from /usr/lib64/libc.so.6 >>> (gdb) p mbundle >>> $1 = (struct mbundle *) 0xcccccccccccccccc >>> >>> Program received signal SIGSEGV, Segmentation fault. >>> [Switching to Thread 0x7fadfefd1700 (LWP 3570)] >>> 0x00007fae029f68a0 in get_ofp_port (xbridge=xbridge@entry=0x7fae03a81680, >>> ofp_port=ofp_port@entry=1) at ofproto/ofproto-dpif-xlate.c:1400 >>> 1400 if (xport->ofp_port == ofp_port) { >>> (gdb) bt >>> #0 0x00007fae029f68a0 in get_ofp_port >>> (xbridge=xbridge@entry=0x7fae03a81680, ofp_port=ofp_port@entry=1) at >>> ofproto/ofproto-dpif-xlate.c:1400 >>> #1 0x00007fae029f692f in lookup_input_bundle (xbridge=0x7fae03a81680, >>> in_port=1, warn=<optimized out>, in_xportp=in_xportp@entry=0x7fadfef93e90) >>> at ofproto/ofproto-dpif-xlate.c:1550 >>> #2 0x00007fae029fba23 in xlate_normal (ctx=0x7fadfef94420) at >>> ofproto/ofproto-dpif-xlate.c:2339 >>> #3 xlate_output_action (ctx=ctx@entry=0x7fadfef94420, port=<optimized >>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at >>> ofproto/ofproto-dpif-xlate.c:3829 >>> #4 0x00007fae029f854f in do_xlate_actions >>> (ofpacts=ofpacts@entry=0x7fae03a7f0a8, ofpacts_len=ofpacts_len@entry=8, >>> ctx=ctx@entry=0x7fadfef94420) at ofproto/ofproto-dpif-xlate.c:4384 >>> #5 0x00007fae029fd6ec in xlate_actions (xin=xin@entry=0x7fadfef955c0, >>> xout=xout@entry=0x7fadfefb3178) at ofproto/ofproto-dpif-xlate.c:5272 >>> #6 0x00007fae029f16f6 in upcall_xlate (wc=0x7fadfefb31d0, >>> odp_actions=0x7fadfefb3190, upcall=0x7fadfefb3120, udpif=0x7fae03a4fb60) at >>> ofproto/ofproto-dpif-upcall.c:1068 >>> #7 process_upcall (udpif=udpif@entry=0x7fae03a4fb60, >>> upcall=upcall@entry=0x7fadfefb3120, >>> odp_actions=odp_actions@entry=0x7fadfefb3190, wc=wc@entry=0x7fadfefb31d0) >>> at ofproto/ofproto-dpif-upcall.c:1206 >>> #8 0x00007fae029f3960 in recv_upcalls (handler=0x7fae03a7e7b8, >>> handler=0x7fae03a7e7b8) at ofproto/ofproto-dpif-upcall.c:778 >>> #9 0x00007fae029f3e1c in udpif_upcall_handler (arg=0x7fae03a7e7b8) at >>> ofproto/ofproto-dpif-upcall.c:696 >>> #10 0x00007fae02a7e596 in ovsthread_wrapper (aux_=<optimized out>) at >>> lib/ovs-thread.c:340 >>> #11 0x00007fae01d14dc5 in start_thread () from /usr/lib64/libpthread.so.0 >>> #12 0x00007fae0132271d in clone () from /usr/lib64/libc.so.6 >>> (gdb) p xport >>> $1 = (struct xport *) 0x19 >>> >>>> From: nickcooper-zhangtonghao [mailto:n...@opencloud.tech] >>>> Sent: Saturday, February 04, 2017 11:48 PM >>>> To: wangyunjian <wangyunj...@huawei.com> >>>> Cc: b...@ovn.org; d...@openvswitch.org; caihe <ca...@huawei.com>; >>>> Huangjian (J) <huangjian.huangj...@huawei.com> >>>> Subject: Re: [ovs-dev] [BUG] upcall handler thread crash >>>> >>>> Hi, what’s the OvS version you tested. I didn’t get the crash with master >>>> version. >>>> The test script is described as below. >>>> >>>> ovs-vsctl add-br br0 >>>> ovs-vsctl add-port br0 eth1 >>>> for i in `seq 0 1000`; >>>> do >>>> ovs-vsctl add-port br0 eth2 >>>> ovs-vsctl del-port br0 eth2 >>>> done >>>> >>>> >>>> Thanks. >>>> Nick >>>> >>>> On Feb 4, 2017, at 7:21 PM, wangyunjian <wangyunj...@huawei.com> wrote: >>>> >>>> Recently, write a script add and delete port repeatly, ovs upcall handler >>>> thread crash with the following trace. >>>> In the code bellow, weather the operations of mbridge->mbundles hmap >>>> should with a lock to protect content between ovs-vswichd thread and the >>>> upcall handler thread: >>>> >>>> static struct mbundle * >>>> mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle) >>>> { >>>> struct mbundle *mbundle; >>>> >>>> HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0), >>>> &mbridge->mbundles) { >>>> if (mbundle->ofbundle == ofbundle) { >>>> return mbundle; >>>> } >>>> } >>>> return NULL; >>>> } >>>> >>>> Call Trace: >>>> #0 0x000000000044d838 in mbundle_lookup (mbridge=0x7fbf68000cc0, >>>> ofbundle=0x7fbf7007c3d0) at ofproto/ofproto_dpif_mirror.c:472 >>>> #1 0x000000000044da15 in mirror_bundle_out (mbridge=<optimized out>, >>>> ofbundle=<optimized out>) at ofproto/ofproto_dpif_mirror.c:192 >>>> #2 0x0000000000448658 in xbundle_mirror_out (xbridge=0x7fbf5c6468a0, >>>> xbundle=0x7fbf3d48a160) at ofproto/ofproto_dpif_xlate.c:1556 >>>> #3 xlate_normal_flood (ctx=ctx@entry=0x7fbf7729e3d0, >>>> in_xbundle=in_xbundle@entry=0x7fbf5c22f870, vlan=vlan@entry=100) at >>>> ofproto/ofproto_dpif_xlate.c:2525 >>>> #4 0x0000000000448e7e in xlate_normal (ctx=0x7fbf7729e3d0) at >>>> ofproto/ofproto_dpif_xlate.c:2724 >>>> #5 xlate_output_action (ctx=ctx@entry=0x7fbf7729e3d0, port=<optimized >>>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at >>>> ofproto/ofproto_dpif_xlate.c:4061 >>>> #6 0x0000000000445147 in do_xlate_actions (ofpacts=0x6eb7288, >>>> ofpacts_len=16, ctx=ctx@entry=0x7fbf7729e3d0) at >>>> ofproto/ofproto_dpif_xlate.c:4616 >>>> #7 0x0000000000446481 in xlate_recursively (rule=0x6eb7100, >>>> ctx=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:3445 >>>> #8 xlate_table_action (ctx=0x7fbf7729e3d0, in_port=<optimized out>, >>>> table_id=<optimized out>, may_packet_in=<optimized out>, >>>> honor_table_miss=<optimized out>) at ofproto/ofproto_dpif_xlate.c:3513 >>>> #9 0x00000000004475a2 in compose_output_action__ >>>> (ctx=ctx@entry=0x7fbf7729e3d0, ofp_port=<optimized out>, xr=<optimized >>>> out>, check_stp=check_stp@entry=true) at ofproto/ofproto_dpif_xlate.c:3206 >>>> #10 0x0000000000447bbf in compose_output_action (xr=<optimized out>, >>>> ofp_port=<optimized out>, ctx=0x7fbf7729e3d0) at >>>> ofproto/ofproto_dpif_xlate.c:3426 >>>> #11 output_normal (ctx=ctx@entry=0x7fbf7729e3d0, >>>> out_xbundle=out_xbundle@entry=0x7fbf5cdf4aa0, vlan=vlan@entry=0) at >>>> ofproto/ofproto_dpif_xlate.c:2073 >>>> #12 0x00000000004486ae in xlate_normal_flood >>>> (ctx=ctx@entry=0x7fbf7729e3d0, in_xbundle=in_xbundle@entry=0x7fbf5e1d44d0, >>>> vlan=vlan@entry=0) at ofproto/ofproto_dpif_xlate.c:2529 >>>> #13 0x0000000000448e7e in xlate_normal (ctx=0x7fbf7729e3d0) at >>>> ofproto/ofproto_dpif_xlate.c:2724 >>>> #14 xlate_output_action (ctx=ctx@entry=0x7fbf7729e3d0, port=<optimized >>>> out>, max_len=<optimized out>, may_packet_in=may_packet_in@entry=true) at >>>> ofproto/ofproto_dpif_xlate.c:4061 >>>> #15 0x0000000000445147 in do_xlate_actions >>>> (ofpacts=ofpacts@entry=0x7fbf70005ae8, ofpacts_len=ofpacts_len@entry=8, >>>> ctx=ctx@entry=0x7fbf7729e3d0) at ofproto/ofproto_dpif_xlate.c:4616 >>>> #16 0x000000000044a739 in xlate_actions (xin=xin@entry=0x7fbf7729f570, >>>> xout=xout@entry=0x7fbf772c0b18) at ofproto/ofproto_dpif_xlate.c:5509 >>>> #17 0x000000000043e4b6 in upcall_xlate (wc=0x7fbf772c0b70, >>>> odp_actions=0x7fbf772c0b30, upcall=0x7fbf772c0ac0, udpif=0x6e164a0) at >>>> ofproto/ofproto_dpif_upcall.c:1082 >>>> #18 process_upcall (udpif=udpif@entry=0x6e164a0, >>>> upcall=upcall@entry=0x7fbf772c0ac0, >>>> odp_actions=odp_actions@entry=0x7fbf772c0b30, wc=wc@entry=0x7fbf772c0b70) >>>> at ofproto/ofproto_dpif_upcall.c:1220 >>>> #19 0x00000000004407d3 in recv_upcalls (handler=0x7fbf58944810, >>>> handler=0x7fbf58944810) at ofproto/ofproto_dpif_upcall.c:784 >>>> #20 0x0000000000440cca in udpif_upcall_handler (arg=0x7fbf58944810) at >>>> ofproto/ofproto_dpif_upcall.c:701 >>>> #21 0x00000000004c95e4 in ovsthread_wrapper (aux_=<optimized out>) at >>>> lib/ovs_thread.c:649 >>>> #22 0x00007fbf7aeaedc5 in start_thread () from /usr/lib64/libpthread.so.0 >>>> #23 0x00007fbf79a5e71d in clone () from /usr/lib64/libc.so.6 >>>> >>>> #0 0x000000000044d838 in mbundle_lookup (mbridge=0x7fbf68000cc0, >>>> ofbundle=0x7fbf7007c3d0) >>>> 472 if (mbundle->ofbundle == ofbundle) { >>>> (gdb) p mbundle >>>> $1 = (struct mbundle *) 0x31 >>>> >>> _______________________________________________ >>> dev mailing list >>> d...@openvswitch.org >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev