From hepeng:
https://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/#2487473

also from guohongzhi <[email protected]>:
http://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/

also from a discussion about the mixing use of RCU and refcount in the mail
list with Ilya Maximets, William Tu, Ben Pfaf, and Gaëtan Rivet.

A summary, as quoted from Ilya:

"
RCU for ofproto was introduced for one
and only one reason - to avoid freeing ofproto while rules are still
alive.  This was done in commit f416c8d61601 ("ofproto: RCU postpone
rule destruction.").  The goal was to allow using rules without
refcounting them within a single grace period.  And that forced us
to postpone destruction of the ofproto for a single grace period.
Later commit 39c9459355b6 ("Use classifier versioning.") made it
possible for rules to be alive for more than one grace period, so
the commit made ofproto wait for 2 grace periods by double postponing.
As we can see now, that wasn't enough and we have to wait for more
than 2 grace periods in certain cases.
"

In a short, the ofproto should have a longer life time than rule, if
the rule lasts for more than 2 grace periods, the ofproto should live
longer to ensure rule->ofproto is valid. It's hard to predict how long
a ofproto should live, thus we need to use refcount on ofproto to make
things easy. The controversial part is that we have already used RCU postpone
to delay ofproto destrution, if we have to add refcount, is it simpler to
use just refcount without RCU postpone?

IMO, I think going back to the pure refcount solution is more
complicated than mixing using both.

Gaëtan Rive asks some questions on guohongzhi's v2 patch:

during ofproto_rule_create, should we use ofproto_ref
or ofproto_try_ref? how can we make sure the ofproto is alive?

by using RCU, I think the answer is that, when you see a pointer to
ofproto, it's alive at least in this grace peroid, so it is ok to use
ofproto_ref instead of ofproto_try_ref. This shows that by mixing use
of RCU and refcount we can save a lot of work to query if ofproto is
alive.

In short, the RCU part makes sure the ofproto is alive when we use it,
and the refcount part makes sure it lives longer enough.

Also regarding a new patch filed recently, people are now making use
of RCU to protect ofproto:

https://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/

In this patch, I have merged guohongzhi's patch and mine, and fixes
accoring to the previous comments.

Signed-off-by: Peng He <[email protected]>
Signed-off-by: guohongzhi <[email protected]>
---
 ofproto/ofproto-dpif-xlate-cache.c |  1 +
 ofproto/ofproto-dpif-xlate.c       |  1 +
 ofproto/ofproto-dpif.c             |  4 +--
 ofproto/ofproto-provider.h         |  2 ++
 ofproto/ofproto.c                  | 45 ++++++++++++++++++++++++++----
 ofproto/ofproto.h                  |  3 ++
 6 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
b/ofproto/ofproto-dpif-xlate-cache.c
index dcc91cb38..0deee365d 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
 {
     switch (entry->type) {
     case XC_TABLE:
+        ofproto_unref(&(entry->table.ofproto->up));
         break;
     case XC_RULE:
         ofproto_rule_unref(&entry->rule->up);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 6fb59e170..0380928a9 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3025,6 +3025,7 @@ xlate_normal(struct xlate_ctx *ctx)
 
         /* Save just enough info to update mac learning table later. */
         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
+        ofproto_ref(&ctx->xbridge->ofproto->up);
         entry->normal.ofproto = ctx->xbridge->ofproto;
         entry->normal.in_port = flow->in_port.ofp_port;
         entry->normal.dl_src = flow->dl_src;
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 8143dd965..6816896dd 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -4472,8 +4472,8 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
             }
             if (xcache) {
                 struct xc_entry *entry;
-
                 entry = xlate_cache_add_entry(xcache, XC_TABLE);
+                ofproto_ref(&ofproto->up);
                 entry->table.ofproto = ofproto;
                 entry->table.id = *table_id;
                 entry->table.match = true;
@@ -4508,8 +4508,8 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
         }
         if (xcache) {
             struct xc_entry *entry;
-
             entry = xlate_cache_add_entry(xcache, XC_TABLE);
+            ofproto_ref(&ofproto->up);
             entry->table.ofproto = ofproto;
             entry->table.id = next_id;
             entry->table.match = (rule != NULL);
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 14b909973..7d07bef01 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -143,6 +143,8 @@ struct ofproto {
     /* Variable length mf_field mapping. Stores all configured variable length
      * meta-flow fields (struct mf_field) in a switch. */
     struct vl_mff_map vl_mff_map;
+
+    struct ovs_refcount refcount;
 };
 
 void ofproto_init_tables(struct ofproto *, int n_tables);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 56aeac720..3cb1a2162 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -549,6 +549,7 @@ ofproto_create(const char *datapath_name, const char 
*datapath_type,
 
     ovs_mutex_init(&ofproto->vl_mff_map.mutex);
     cmap_init(&ofproto->vl_mff_map.cmap);
+    ovs_refcount_init(&ofproto->refcount);
 
     error = ofproto->ofproto_class->construct(ofproto);
     if (error) {
@@ -1695,9 +1696,24 @@ ofproto_destroy__(struct ofproto *ofproto)
     ofproto->ofproto_class->dealloc(ofproto);
 }
 
-/* Destroying rules is doubly deferred, must have 'ofproto' around for them.
- * - 1st we defer the removal of the rules from the classifier
- * - 2nd we defer the actual destruction of the rules. */
+/* We used to use defer function to wait for two grace periods
+ * as we assume the rule that holds the ofproto pointer will
+ * live at most two grace period. Howvever, we found at certain
+ * cases, this assumption does not stand.
+ *
+ * destroying a rule may have to wait multiple grace periods:
+ * remove_rules_postponed (one grace period)
+ *       -> remove_rule_rcu
+ *           -> remove_rule_rcu__
+ *               -> ofproto_rule_unref -> ref count != 1
+ *                   -> ... more grace periods.
+ *                   -> rule_destroy_cb (> 2 grace periods)
+ *                       -> free
+ *
+ * So we have to check the refcount for sure all the rules
+ * have been destroyed.
+ *
+ */
 static void
 ofproto_destroy_defer__(struct ofproto *ofproto)
     OVS_EXCLUDED(ofproto_mutex)
@@ -1705,6 +1721,20 @@ ofproto_destroy_defer__(struct ofproto *ofproto)
     ovsrcu_postpone(ofproto_destroy__, ofproto);
 }
 
+void
+ofproto_ref(struct ofproto *ofproto)
+{
+    ovs_refcount_ref(&ofproto->refcount);
+}
+
+void
+ofproto_unref(struct ofproto *ofproto)
+{
+    if (ofproto && ovs_refcount_unref(&ofproto->refcount) == 1) {
+        ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
+    }
+}
+
 void
 ofproto_destroy(struct ofproto *p, bool del)
     OVS_EXCLUDED(ofproto_mutex)
@@ -1736,8 +1766,7 @@ ofproto_destroy(struct ofproto *p, bool del)
     p->connmgr = NULL;
     ovs_mutex_unlock(&ofproto_mutex);
 
-    /* Destroying rules is deferred, must have 'ofproto' around for them. */
-    ovsrcu_postpone(ofproto_destroy_defer__, p);
+    ofproto_unref(p);
 }
 
 /* Destroys the datapath with the respective 'name' and 'type'.  With the Linux
@@ -2930,6 +2959,7 @@ ofproto_rule_destroy__(struct rule *rule)
     rule_actions_destroy(rule_get_actions(rule));
     ovs_mutex_destroy(&rule->mutex);
     rule->ofproto->ofproto_class->rule_dealloc(rule);
+    ofproto_unref(rule->ofproto);
 }
 
 static void
@@ -3070,6 +3100,7 @@ group_destroy_cb(struct ofgroup *group)
     ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
                                            &group->buckets));
     group->ofproto->ofproto_class->group_dealloc(group);
+    ofproto_unref(group->ofproto);
 }
 
 void
@@ -5281,6 +5312,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct 
cls_rule *cr,
 
     /* Initialize base state. */
     *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto;
+    ofproto_ref(ofproto);
     cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr);
     ovs_refcount_init(&rule->ref_count);
 
@@ -7378,6 +7410,9 @@ init_group(struct ofproto *ofproto, const struct 
ofputil_group_mod *gm,
         ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
                                                &(*ofgroup)->buckets));
         ofproto->ofproto_class->group_dealloc(*ofgroup);
+    } else {
+        /* group construct succeed, ref ofproto */
+        ofproto_ref(ofproto);
     }
     return error;
 }
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index b0262da2d..84770113f 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -563,6 +563,9 @@ int ofproto_port_get_cfm_status(const struct ofproto *,
 enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *,
                                                       uint8_t table_id);
 
+void ofproto_ref(struct ofproto *);
+void ofproto_unref(struct ofproto *);
+
 #ifdef  __cplusplus
 }
 #endif
-- 
2.25.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to