Ah,
found a bug, will submit a v4.

Peng He <[email protected]> 于2022年1月18日周二 21:08写道:

> From hepeng:
>
> https://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/#2487473
>
> also from guohongzhi <[email protected]>:
>
> http://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/
>
> also from a discussion about the mixing use of RCU and refcount in the mail
> list with Ilya Maximets, William Tu, Ben Pfaf, and Gaëtan Rivet.
>
> A summary, as quoted from Ilya:
>
> "
> RCU for ofproto was introduced for one
> and only one reason - to avoid freeing ofproto while rules are still
> alive.  This was done in commit f416c8d61601 ("ofproto: RCU postpone
> rule destruction.").  The goal was to allow using rules without
> refcounting them within a single grace period.  And that forced us
> to postpone destruction of the ofproto for a single grace period.
> Later commit 39c9459355b6 ("Use classifier versioning.") made it
> possible for rules to be alive for more than one grace period, so
> the commit made ofproto wait for 2 grace periods by double postponing.
> As we can see now, that wasn't enough and we have to wait for more
> than 2 grace periods in certain cases.
> "
>
> In a short, the ofproto should have a longer life time than rule, if
> the rule lasts for more than 2 grace periods, the ofproto should live
> longer to ensure rule->ofproto is valid. It's hard to predict how long
> a ofproto should live, thus we need to use refcount on ofproto to make
> things easy. The controversial part is that we have already used RCU
> postpone
> to delay ofproto destrution, if we have to add refcount, is it simpler to
> use just refcount without RCU postpone?
>
> IMO, I think going back to the pure refcount solution is more
> complicated than mixing using both.
>
> Gaëtan Rive asks some questions on guohongzhi's v2 patch:
>
> during ofproto_rule_create, should we use ofproto_ref
> or ofproto_try_ref? how can we make sure the ofproto is alive?
>
> by using RCU, I think the answer is that, when you see a pointer to
> ofproto, it's alive at least in this grace peroid, so it is ok to use
> ofproto_ref instead of ofproto_try_ref. This shows that by mixing use
> of RCU and refcount we can save a lot of work to query if ofproto is
> alive.
>
> In short, the RCU part makes sure the ofproto is alive when we use it,
> and the refcount part makes sure it lives longer enough.
>
> Also regarding a new patch filed recently, people are now making use
> of RCU to protect ofproto:
>
>
> https://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/
>
> In this patch, I have merged guohongzhi's patch and mine, and fixes
> accoring to the previous comments.
>
> Signed-off-by: Peng He <[email protected]>
> Signed-off-by: guohongzhi <[email protected]>
> ---
>  ofproto/ofproto-dpif-xlate-cache.c |  1 +
>  ofproto/ofproto-dpif-xlate.c       |  1 +
>  ofproto/ofproto-dpif.c             |  4 +--
>  ofproto/ofproto-provider.h         |  2 ++
>  ofproto/ofproto.c                  | 45 ++++++++++++++++++++++++++----
>  ofproto/ofproto.h                  |  3 ++
>  6 files changed, 49 insertions(+), 7 deletions(-)
>
> diff --git a/ofproto/ofproto-dpif-xlate-cache.c
> b/ofproto/ofproto-dpif-xlate-cache.c
> index dcc91cb38..0deee365d 100644
> --- a/ofproto/ofproto-dpif-xlate-cache.c
> +++ b/ofproto/ofproto-dpif-xlate-cache.c
> @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
>  {
>      switch (entry->type) {
>      case XC_TABLE:
> +        ofproto_unref(&(entry->table.ofproto->up));
>          break;
>      case XC_RULE:
>          ofproto_rule_unref(&entry->rule->up);
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 6fb59e170..0380928a9 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -3025,6 +3025,7 @@ xlate_normal(struct xlate_ctx *ctx)
>
>          /* Save just enough info to update mac learning table later. */
>          entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
> +        ofproto_ref(&ctx->xbridge->ofproto->up);
>          entry->normal.ofproto = ctx->xbridge->ofproto;
>          entry->normal.in_port = flow->in_port.ofp_port;
>          entry->normal.dl_src = flow->dl_src;
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index 8143dd965..6816896dd 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -4472,8 +4472,8 @@ rule_dpif_lookup_from_table(struct ofproto_dpif
> *ofproto,
>              }
>              if (xcache) {
>                  struct xc_entry *entry;
> -
>                  entry = xlate_cache_add_entry(xcache, XC_TABLE);
> +                ofproto_ref(&ofproto->up);
>                  entry->table.ofproto = ofproto;
>                  entry->table.id = *table_id;
>                  entry->table.match = true;
> @@ -4508,8 +4508,8 @@ rule_dpif_lookup_from_table(struct ofproto_dpif
> *ofproto,
>          }
>          if (xcache) {
>              struct xc_entry *entry;
> -
>              entry = xlate_cache_add_entry(xcache, XC_TABLE);
> +            ofproto_ref(&ofproto->up);
>              entry->table.ofproto = ofproto;
>              entry->table.id = next_id;
>              entry->table.match = (rule != NULL);
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 14b909973..7d07bef01 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -143,6 +143,8 @@ struct ofproto {
>      /* Variable length mf_field mapping. Stores all configured variable
> length
>       * meta-flow fields (struct mf_field) in a switch. */
>      struct vl_mff_map vl_mff_map;
> +
> +    struct ovs_refcount refcount;
>  };
>
>  void ofproto_init_tables(struct ofproto *, int n_tables);
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index 56aeac720..3cb1a2162 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -549,6 +549,7 @@ ofproto_create(const char *datapath_name, const char
> *datapath_type,
>
>      ovs_mutex_init(&ofproto->vl_mff_map.mutex);
>      cmap_init(&ofproto->vl_mff_map.cmap);
> +    ovs_refcount_init(&ofproto->refcount);
>
>      error = ofproto->ofproto_class->construct(ofproto);
>      if (error) {
> @@ -1695,9 +1696,24 @@ ofproto_destroy__(struct ofproto *ofproto)
>      ofproto->ofproto_class->dealloc(ofproto);
>  }
>
> -/* Destroying rules is doubly deferred, must have 'ofproto' around for
> them.
> - * - 1st we defer the removal of the rules from the classifier
> - * - 2nd we defer the actual destruction of the rules. */
> +/* We used to use defer function to wait for two grace periods
> + * as we assume the rule that holds the ofproto pointer will
> + * live at most two grace period. Howvever, we found at certain
> + * cases, this assumption does not stand.
> + *
> + * destroying a rule may have to wait multiple grace periods:
> + * remove_rules_postponed (one grace period)
> + *       -> remove_rule_rcu
> + *           -> remove_rule_rcu__
> + *               -> ofproto_rule_unref -> ref count != 1
> + *                   -> ... more grace periods.
> + *                   -> rule_destroy_cb (> 2 grace periods)
> + *                       -> free
> + *
> + * So we have to check the refcount for sure all the rules
> + * have been destroyed.
> + *
> + */
>  static void
>  ofproto_destroy_defer__(struct ofproto *ofproto)
>      OVS_EXCLUDED(ofproto_mutex)
> @@ -1705,6 +1721,20 @@ ofproto_destroy_defer__(struct ofproto *ofproto)
>      ovsrcu_postpone(ofproto_destroy__, ofproto);
>  }
>
> +void
> +ofproto_ref(struct ofproto *ofproto)
> +{
> +    ovs_refcount_ref(&ofproto->refcount);
> +}
> +
> +void
> +ofproto_unref(struct ofproto *ofproto)
> +{
> +    if (ofproto && ovs_refcount_unref(&ofproto->refcount) == 1) {
> +        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
> +    }
> +}
> +
>  void
>  ofproto_destroy(struct ofproto *p, bool del)
>      OVS_EXCLUDED(ofproto_mutex)
> @@ -1736,8 +1766,7 @@ ofproto_destroy(struct ofproto *p, bool del)
>      p->connmgr = NULL;
>      ovs_mutex_unlock(&ofproto_mutex);
>
> -    /* Destroying rules is deferred, must have 'ofproto' around for them.
> */
> -    ovsrcu_postpone(ofproto_destroy_defer__, p);
> +    ofproto_unref(p);
>  }
>
>  /* Destroys the datapath with the respective 'name' and 'type'.  With the
> Linux
> @@ -2930,6 +2959,7 @@ ofproto_rule_destroy__(struct rule *rule)
>      rule_actions_destroy(rule_get_actions(rule));
>      ovs_mutex_destroy(&rule->mutex);
>      rule->ofproto->ofproto_class->rule_dealloc(rule);
> +    ofproto_unref(rule->ofproto);
>  }
>
>  static void
> @@ -3070,6 +3100,7 @@ group_destroy_cb(struct ofgroup *group)
>      ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
>                                             &group->buckets));
>      group->ofproto->ofproto_class->group_dealloc(group);
> +    ofproto_unref(group->ofproto);
>  }
>
>  void
> @@ -5281,6 +5312,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct
> cls_rule *cr,
>
>      /* Initialize base state. */
>      *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
> +    ofproto_ref(ofproto);
>      cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
>      ovs_refcount_init(&rule->ref_count);
>
> @@ -7378,6 +7410,9 @@ init_group(struct ofproto *ofproto, const struct
> ofputil_group_mod *gm,
>          ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
>                                                 &(*ofgroup)->buckets));
>          ofproto->ofproto_class->group_dealloc(*ofgroup);
> +    } else {
> +        /* group construct succeed, ref ofproto */
> +        ofproto_ref(ofproto);
>      }
>      return error;
>  }
> diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
> index b0262da2d..84770113f 100644
> --- a/ofproto/ofproto.h
> +++ b/ofproto/ofproto.h
> @@ -563,6 +563,9 @@ int ofproto_port_get_cfm_status(const struct ofproto *,
>  enum ofputil_table_miss ofproto_table_get_miss_config(const struct
> ofproto *,
>                                                        uint8_t table_id);
>
> +void ofproto_ref(struct ofproto *);
> +void ofproto_unref(struct ofproto *);
> +
>  #ifdef  __cplusplus
>  }
>  #endif
> --
> 2.25.1
>
>

-- 
hepeng
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to