Ben,

According to current implementation, There is no problem if rule A has a 
non-NULL struct rule_dpif's 'new_rule'  B, because there is rcu synchronization 
during bridge deletion, and ovsrcu_postone() is called twice by 
ofproto_destroy(), which could also help to make sure ofproto will not be 
released before release rule B (see function ofproto_destroy() and 
ofproto_destroy_defer__() for detail). 

But if rule A has a non-NULL 'new_rule'  B, and rule B has a non-NULL 
'new_rule' C also, there will be problem, because ofproto has been released 
when release rule C, but ofproto will be used when release a rule in function 
ofproto_rule_destroy__().

I copied the bt information below for reference.

 (gdb) bt
#0  0x00007f811052d157 in __GI_raise (sig=sig@entry=6) at 
../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1  0x00007f811052e848 in __GI_abort () at abort.c:90
#2  0x00000000006218a9 in PAT_abort ()
#3  0x000000000061ebbd in patchIllInsHandler ()
#4  <signal handler called>
#5  0x0000000000470169 in rule_destroy_cb (rule=0x7f80601ec280) at 
ofproto/ofproto.c:2851
#6  0x000000000052d546 in ovsrcu_call_postponed () at lib/ovs_rcu.c:323
#7  0x000000000052d714 in ovsrcu_postpone_thread (arg=<optimized out>) at 
lib/ovs_rcu.c:338
#8  0x000000000052fa81 in ovsthread_wrapper (aux_=<optimized out>) at 
lib/ovs_thread.c:651
#9  0x00007f8111a3edc5 in start_thread (arg=0x7f80c67fc700) at 
pthread_create.c:308
#10 0x00007f81105ef75d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:113
(gdb) f 5
#5  0x0000000000470169 in rule_destroy_cb (rule=0x7f80601ec280) at 
ofproto/ofproto.c:2851
2851        rule->ofproto->ofproto_class->rule_destruct(rule);



-----Original Message-----
From: Ben Pfaff [mailto:[email protected]] 
Sent: 2018年1月9日 5:05
To: Zhanghaibo (Euler) <[email protected]>
Cc: [email protected]; Chengwentao (Vintorcheng) <[email protected]>
Subject: Re: [ovs-dev] [PATCH] ofproto:fix abort issue when delete bridge

This seems like the wrong approach to me.  If there's a problem that struct 
rule_dpif's 'new_rule' member can sometimes point to a rule that gets freed, 
then one would normally fix that through some kind of synchronization between 
the rules (for example, one could make sure that RCU keeps ->new_rule from 
being freed while it is still in use), not by adding a refcount on a data 
structure multiple levels higher.

On Mon, Jan 08, 2018 at 11:09:58AM +0800, Haibo Zhang wrote:
> When delete a birdge, all the rule of the bridge wil be removed. When 
> destruct a rule, it is possible that the rule has a non-NULL new_rule A, and 
> the new_rule A might has a non-NULL new_rule B, and the new_rule B might has 
> a non-NULL new_rule C... in this case, the ofproto has been freed before rule 
> B or C was freed, and it will cause crash issue when free rule B or C using 
> rcu mechanism. To fix the issue, a reference count is introduced to ofproto 
> to make sure all rules of the ofproto were freed completely before the 
> ofproto was freed.
> 
> Signed-off-by: Haibo Zhang <[email protected]>
> ---
>  ofproto/ofproto-dpif.c     | 14 ++++++++++++--
>  ofproto/ofproto-provider.h |  9 ++++++++-
>  ofproto/ofproto.c          | 26 ++++++++++++++++----------
>  3 files changed, 36 insertions(+), 13 deletions(-)
> 
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 
> 43b7b89..968a51a 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -4254,16 +4254,26 @@ static struct rule_dpif *rule_dpif_cast(const 
> struct rule *rule)  }
>  
>  static struct rule *
> -rule_alloc(void)
> +rule_alloc(struct ofproto * ofproto)
>  {
> +    struct rule * rule_;
>      struct rule_dpif *rule = xzalloc(sizeof *rule);
> -    return &rule->up;
> +
> +    if (OVS_UNLIKELY(!rule)) {
> +        return NULL;
> +    }
> +
> +    rule_ = &rule->up;
> +    *CONST_CAST(struct ofproto **, &rule_->ofproto) = ofproto;
> +    ofproto_ref(ofproto);
> +    return rule_;
>  }
>  
>  static void
>  rule_dealloc(struct rule *rule_)
>  {
>      struct rule_dpif *rule = rule_dpif_cast(rule_);
> +    ofproto_unref(rule_->ofproto);
>      free(rule);
>  }
>  
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h 
> index 9dc73c4..ce3e0f7 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -133,6 +133,11 @@ struct ofproto {
>      /* Variable length mf_field mapping. Stores all configured variable 
> length
>       * meta-flow fields (struct mf_field) in a switch. */
>      struct vl_mff_map vl_mff_map;
> +    /* Number of references.
> +     * bridge keep one reference
> +     * Any rule trying to keep ofproto from being freed should hold its own
> +     * reference. */
> +    struct ovs_refcount ref_count;
>  };
>  
>  void ofproto_init_tables(struct ofproto *, int n_tables); @@ -433,6 
> +438,8 @@ struct rule {  void ofproto_rule_ref(struct rule *);  bool 
> ofproto_rule_try_ref(struct rule *);  void ofproto_rule_unref(struct 
> rule *);
> +void ofproto_ref(struct ofproto *);
> +void ofproto_unref(struct ofproto *);
>  
>  static inline const struct rule_actions * rule_get_actions(const 
> struct rule *);  static inline bool rule_is_table_miss(const struct 
> rule *); @@ -1288,7 +1295,7 @@ struct ofproto_class {
>       * ->rule_destruct() must uninitialize derived state.
>       *
>       * Rule destruction must not fail. */
> -    struct rule *(*rule_alloc)(void);
> +    struct rule *(*rule_alloc)(struct ofproto *);
>      enum ofperr (*rule_construct)(struct rule *rule)
>          /* OVS_REQUIRES(ofproto_mutex) */;
>      void (*rule_insert)(struct rule *rule, struct rule *old_rule, 
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 
> 84eb18e..d68de6b 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -523,6 +523,7 @@ ofproto_create(const char *datapath_name, const char 
> *datapath_type,
>      ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name);
>      ofproto->min_mtu = INT_MAX;
>      cmap_init(&ofproto->groups);
> +    ovs_refcount_init(&ofproto->ref_count);
>      ovs_mutex_unlock(&ofproto_mutex);
>      ofproto->ogf.types = 0xf;
>      ofproto->ogf.capabilities = OFPGFC_CHAINING | 
> OFPGFC_SELECT_LIVENESS | @@ -1616,14 +1617,20 @@ ofproto_destroy__(struct 
> ofproto *ofproto)
>      ofproto->ofproto_class->dealloc(ofproto);
>  }
>  
> -/* Destroying rules is doubly deferred, must have 'ofproto' around for them.
> - * - 1st we defer the removal of the rules from the classifier
> - * - 2nd we defer the actual destruction of the rules. */ -static 
> void -ofproto_destroy_defer__(struct ofproto *ofproto)
> -    OVS_EXCLUDED(ofproto_mutex)
> +void
> +ofproto_ref(struct ofproto *ofproto)
>  {
> -    ovsrcu_postpone(ofproto_destroy__, ofproto);
> +    if (ofproto) {
> +        ovs_refcount_ref(&ofproto->ref_count);
> +    }
> +}
> +
> +void
> +ofproto_unref(struct ofproto *ofproto) {
> +    if (ofproto && ovs_refcount_unref_relaxed(&ofproto->ref_count) == 1) {
> +        ovsrcu_postpone(ofproto_destroy__, ofproto);
> +    }
>  }
>  
>  void
> @@ -1660,8 +1667,7 @@ ofproto_destroy(struct ofproto *p, bool del)
>      p->connmgr = NULL;
>      ovs_mutex_unlock(&ofproto_mutex);
>  
> -    /* Destroying rules is deferred, must have 'ofproto' around for them. */
> -    ovsrcu_postpone(ofproto_destroy_defer__, p);
> +    ofproto_unref(p);
>  }
>  
>  /* Destroys the datapath with the respective 'name' and 'type'.  With 
> the Linux @@ -4888,7 +4894,7 @@ ofproto_rule_create(struct ofproto *ofproto, 
> struct cls_rule *cr,
>      enum ofperr error;
>  
>      /* Allocate new rule. */
> -    rule = ofproto->ofproto_class->rule_alloc();
> +    rule = ofproto->ofproto_class->rule_alloc(ofproto);
>      if (!rule) {
>          cls_rule_destroy(cr);
>          VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", 
> ofproto->name);
> --
> 1.8.3.1
> 
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to