Ben, According to current implementation, There is no problem if rule A has a non-NULL struct rule_dpif's 'new_rule' B, because there is rcu synchronization during bridge deletion, and ovsrcu_postone() is called twice by ofproto_destroy(), which could also help to make sure ofproto will not be released before release rule B (see function ofproto_destroy() and ofproto_destroy_defer__() for detail).
But if rule A has a non-NULL 'new_rule' B, and rule B has a non-NULL 'new_rule' C also, there will be problem, because ofproto has been released when release rule C, but ofproto will be used when release a rule in function ofproto_rule_destroy__(). I copied the bt information below for reference. (gdb) bt #0 0x00007f811052d157 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 #1 0x00007f811052e848 in __GI_abort () at abort.c:90 #2 0x00000000006218a9 in PAT_abort () #3 0x000000000061ebbd in patchIllInsHandler () #4 <signal handler called> #5 0x0000000000470169 in rule_destroy_cb (rule=0x7f80601ec280) at ofproto/ofproto.c:2851 #6 0x000000000052d546 in ovsrcu_call_postponed () at lib/ovs_rcu.c:323 #7 0x000000000052d714 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs_rcu.c:338 #8 0x000000000052fa81 in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs_thread.c:651 #9 0x00007f8111a3edc5 in start_thread (arg=0x7f80c67fc700) at pthread_create.c:308 #10 0x00007f81105ef75d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 (gdb) f 5 #5 0x0000000000470169 in rule_destroy_cb (rule=0x7f80601ec280) at ofproto/ofproto.c:2851 2851 rule->ofproto->ofproto_class->rule_destruct(rule); -----Original Message----- From: Ben Pfaff [mailto:[email protected]] Sent: 2018年1月9日 5:05 To: Zhanghaibo (Euler) <[email protected]> Cc: [email protected]; Chengwentao (Vintorcheng) <[email protected]> Subject: Re: [ovs-dev] [PATCH] ofproto:fix abort issue when delete bridge This seems like the wrong approach to me. If there's a problem that struct rule_dpif's 'new_rule' member can sometimes point to a rule that gets freed, then one would normally fix that through some kind of synchronization between the rules (for example, one could make sure that RCU keeps ->new_rule from being freed while it is still in use), not by adding a refcount on a data structure multiple levels higher. On Mon, Jan 08, 2018 at 11:09:58AM +0800, Haibo Zhang wrote: > When delete a birdge, all the rule of the bridge wil be removed. When > destruct a rule, it is possible that the rule has a non-NULL new_rule A, and > the new_rule A might has a non-NULL new_rule B, and the new_rule B might has > a non-NULL new_rule C... in this case, the ofproto has been freed before rule > B or C was freed, and it will cause crash issue when free rule B or C using > rcu mechanism. To fix the issue, a reference count is introduced to ofproto > to make sure all rules of the ofproto were freed completely before the > ofproto was freed. > > Signed-off-by: Haibo Zhang <[email protected]> > --- > ofproto/ofproto-dpif.c | 14 ++++++++++++-- > ofproto/ofproto-provider.h | 9 ++++++++- > ofproto/ofproto.c | 26 ++++++++++++++++---------- > 3 files changed, 36 insertions(+), 13 deletions(-) > > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index > 43b7b89..968a51a 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -4254,16 +4254,26 @@ static struct rule_dpif *rule_dpif_cast(const > struct rule *rule) } > > static struct rule * > -rule_alloc(void) > +rule_alloc(struct ofproto * ofproto) > { > + struct rule * rule_; > struct rule_dpif *rule = xzalloc(sizeof *rule); > - return &rule->up; > + > + if (OVS_UNLIKELY(!rule)) { > + return NULL; > + } > + > + rule_ = &rule->up; > + *CONST_CAST(struct ofproto **, &rule_->ofproto) = ofproto; > + ofproto_ref(ofproto); > + return rule_; > } > > static void > rule_dealloc(struct rule *rule_) > { > struct rule_dpif *rule = rule_dpif_cast(rule_); > + ofproto_unref(rule_->ofproto); > free(rule); > } > > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h > index 9dc73c4..ce3e0f7 100644 > --- a/ofproto/ofproto-provider.h > +++ b/ofproto/ofproto-provider.h > @@ -133,6 +133,11 @@ struct ofproto { > /* Variable length mf_field mapping. Stores all configured variable > length > * meta-flow fields (struct mf_field) in a switch. */ > struct vl_mff_map vl_mff_map; > + /* Number of references. > + * bridge keep one reference > + * Any rule trying to keep ofproto from being freed should hold its own > + * reference. */ > + struct ovs_refcount ref_count; > }; > > void ofproto_init_tables(struct ofproto *, int n_tables); @@ -433,6 > +438,8 @@ struct rule { void ofproto_rule_ref(struct rule *); bool > ofproto_rule_try_ref(struct rule *); void ofproto_rule_unref(struct > rule *); > +void ofproto_ref(struct ofproto *); > +void ofproto_unref(struct ofproto *); > > static inline const struct rule_actions * rule_get_actions(const > struct rule *); static inline bool rule_is_table_miss(const struct > rule *); @@ -1288,7 +1295,7 @@ struct ofproto_class { > * ->rule_destruct() must uninitialize derived state. > * > * Rule destruction must not fail. */ > - struct rule *(*rule_alloc)(void); > + struct rule *(*rule_alloc)(struct ofproto *); > enum ofperr (*rule_construct)(struct rule *rule) > /* OVS_REQUIRES(ofproto_mutex) */; > void (*rule_insert)(struct rule *rule, struct rule *old_rule, > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index > 84eb18e..d68de6b 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -523,6 +523,7 @@ ofproto_create(const char *datapath_name, const char > *datapath_type, > ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name); > ofproto->min_mtu = INT_MAX; > cmap_init(&ofproto->groups); > + ovs_refcount_init(&ofproto->ref_count); > ovs_mutex_unlock(&ofproto_mutex); > ofproto->ogf.types = 0xf; > ofproto->ogf.capabilities = OFPGFC_CHAINING | > OFPGFC_SELECT_LIVENESS | @@ -1616,14 +1617,20 @@ ofproto_destroy__(struct > ofproto *ofproto) > ofproto->ofproto_class->dealloc(ofproto); > } > > -/* Destroying rules is doubly deferred, must have 'ofproto' around for them. > - * - 1st we defer the removal of the rules from the classifier > - * - 2nd we defer the actual destruction of the rules. */ -static > void -ofproto_destroy_defer__(struct ofproto *ofproto) > - OVS_EXCLUDED(ofproto_mutex) > +void > +ofproto_ref(struct ofproto *ofproto) > { > - ovsrcu_postpone(ofproto_destroy__, ofproto); > + if (ofproto) { > + ovs_refcount_ref(&ofproto->ref_count); > + } > +} > + > +void > +ofproto_unref(struct ofproto *ofproto) { > + if (ofproto && ovs_refcount_unref_relaxed(&ofproto->ref_count) == 1) { > + ovsrcu_postpone(ofproto_destroy__, ofproto); > + } > } > > void > @@ -1660,8 +1667,7 @@ ofproto_destroy(struct ofproto *p, bool del) > p->connmgr = NULL; > ovs_mutex_unlock(&ofproto_mutex); > > - /* Destroying rules is deferred, must have 'ofproto' around for them. */ > - ovsrcu_postpone(ofproto_destroy_defer__, p); > + ofproto_unref(p); > } > > /* Destroys the datapath with the respective 'name' and 'type'. With > the Linux @@ -4888,7 +4894,7 @@ ofproto_rule_create(struct ofproto *ofproto, > struct cls_rule *cr, > enum ofperr error; > > /* Allocate new rule. */ > - rule = ofproto->ofproto_class->rule_alloc(); > + rule = ofproto->ofproto_class->rule_alloc(ofproto); > if (!rule) { > cls_rule_destroy(cr); > VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", > ofproto->name); > -- > 1.8.3.1 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
