From: Peng He <hepeng.0...@bytedance.com> The call stack of rule_destroy_cb:
remove_rules_postponed (one grace period) -> remove_rule_rcu -> remove_rule_rcu__ -> ofproto_rule_unref -> ref count != 1 -> ... more grace periods. -> rule_destroy_cb (> 2 grace periods) -> free Currently ofproto_destory waits only two grace periods, this means when rule_destroy_cb is called, the rule->ofproto might be a dangling pointer. This patch adds a refcount for ofproto to ensure ofproto exists when it is needed to call free functions. This patch fixes the crashes found: Program terminated with signal SIGSEGV, Segmentation fault. 0 0x0000562a55169e49 in rule_destroy_cb (rule=0x562a598be2c0) at ofproto/ofproto.c:2956 1 0x0000562a552623d6 in ovsrcu_call_postponed () at lib/ovs-rcu.c:348 2 0x0000562a55262504 in ovsrcu_postpone_thread (arg=<optimized out>) at lib/ovs-rcu.c:364 3 0x0000562a55264aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383 4 0x00007febe715a4a4 in start_thread (arg=0x7febe4c1c700) at pthread_create.c:456 5 0x00007febe6990d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97 (gdb) p rule->ofproto->ofproto_class $3 = (const struct ofproto_class *) 0x0 Also: 0 ofproto_dpif_credit_table_stats (ofproto=0x5583583332b0, table_id=0 '\000', n_matches=5, n_misses=0) at ofproto/ofproto-dpif.c:4310 1 0x0000558354c68514 in xlate_push_stats_entry (entry=entry@entry=0x7ff488031398, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:99 2 0x0000558354c686f3 in xlate_push_stats (xcache=<optimized out>, stats=stats@entry=0x7ff49af30b60) at ofproto/ofproto-dpif-xlate-cache.c:181 3 0x0000558354c5411a in revalidate_ukey (udpif=udpif@entry=0x5583583baba0, ukey=ukey@entry=0x7ff47809f770, stats=stats@entry=0x7ff49af32128, odp_actions=odp_actions@entry=0x7ff49af30c50, reval_seq=reval_seq@entry=275670456, recircs=recircs@entry=0x7ff49af30cd0) at ofproto/ofproto-dpif-upcall.c:2292 4 0x0000558354c57cbc in revalidate (revalidator=<optimized out>) at ofproto/ofproto-dpif-upcall.c:2681 5 0x0000558354c57f8e in udpif_revalidator (arg=0x5583583d2a90) at ofproto/ofproto-dpif-upcall.c:934 6 0x0000558354d24aef in ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:383 7 0x00007ff4a479d4a4 in start_thread (arg=0x7ff49af35700) at pthread_create.c:456 8 0x00007ff4a3fd3d0f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97 (gdb) p ofproto->up.ofproto_class $3 = (const struct ofproto_class *) 0x0 Signed-off-by: Peng He <hepeng.0...@bytedance.com> --- ofproto/ofproto-dpif-xlate-cache.c | 1 + ofproto/ofproto-dpif.c | 22 +++++++++-------- ofproto/ofproto-provider.h | 2 ++ ofproto/ofproto.c | 39 +++++++++++++++++++++++++++--- ofproto/ofproto.h | 4 +++ 5 files changed, 54 insertions(+), 14 deletions(-) diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c index dcc91cb38..0deee365d 100644 --- a/ofproto/ofproto-dpif-xlate-cache.c +++ b/ofproto/ofproto-dpif-xlate-cache.c @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) { switch (entry->type) { case XC_TABLE: + ofproto_unref(&(entry->table.ofproto->up)); break; case XC_RULE: ofproto_rule_unref(&entry->rule->up); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 4f0638f23..6480b3c78 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -4414,11 +4414,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, } if (xcache) { struct xc_entry *entry; - - entry = xlate_cache_add_entry(xcache, XC_TABLE); - entry->table.ofproto = ofproto; - entry->table.id = *table_id; - entry->table.match = true; + if (ofproto_try_ref(&ofproto->up)) { + entry = xlate_cache_add_entry(xcache, XC_TABLE); + entry->table.ofproto = ofproto; + entry->table.id = *table_id; + entry->table.match = true; + } } return rule; } @@ -4450,11 +4451,12 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, } if (xcache) { struct xc_entry *entry; - - entry = xlate_cache_add_entry(xcache, XC_TABLE); - entry->table.ofproto = ofproto; - entry->table.id = next_id; - entry->table.match = (rule != NULL); + if (ofproto_try_ref(&ofproto->up)) { + entry = xlate_cache_add_entry(xcache, XC_TABLE); + entry->table.ofproto = ofproto; + entry->table.id = next_id; + entry->table.match = (rule != NULL); + } } if (rule) { goto out; /* Match. */ diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index afecb24cb..34074fd62 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -139,6 +139,8 @@ struct ofproto { /* Variable length mf_field mapping. Stores all configured variable length * meta-flow fields (struct mf_field) in a switch. */ struct vl_mff_map vl_mff_map; + + struct ovs_refcount refcount; }; void ofproto_init_tables(struct ofproto *, int n_tables); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 59f06aa94..704ac4146 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -474,6 +474,24 @@ ofproto_bump_tables_version(struct ofproto *ofproto) ofproto->tables_version); } +void +ofproto_ref(struct ofproto *ofproto) +{ + ovs_refcount_ref(&ofproto->refcount); +} + +bool +ofproto_try_ref(struct ofproto *ofproto) +{ + return ovs_refcount_try_ref_rcu(&ofproto->refcount); +} + +void +ofproto_unref(struct ofproto *ofproto) +{ + ovs_refcount_unref(&ofproto->refcount); +} + int ofproto_create(const char *datapath_name, const char *datapath_type, struct ofproto **ofprotop) @@ -545,6 +563,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, ovs_mutex_init(&ofproto->vl_mff_map.mutex); cmap_init(&ofproto->vl_mff_map.cmap); + ovs_refcount_init(&ofproto->refcount); error = ofproto->ofproto_class->construct(ofproto); if (error) { @@ -1696,14 +1715,24 @@ ofproto_destroy__(struct ofproto *ofproto) ofproto->ofproto_class->dealloc(ofproto); } -/* Destroying rules is doubly deferred, must have 'ofproto' around for them. - * - 1st we defer the removal of the rules from the classifier - * - 2nd we defer the actual destruction of the rules. */ +/* destroying a rule has a very long calling chains, and may have + * to wait multiple grace periods: + * + * for example: + * remove_rules_postponed -> remove_rule_rcu -> ofproto_rule_unref + * -> ovsrcu_postpone(rule_destroy_cb, rule) + * + * So we have to check if the ofproto refcount reaches to 1 for sure + * all the rules have been destoryed. + */ static void ofproto_destroy_defer__(struct ofproto *ofproto) OVS_EXCLUDED(ofproto_mutex) { - ovsrcu_postpone(ofproto_destroy__, ofproto); + if (ovs_refcount_read(&ofproto->refcount) != 1) { + ovsrcu_postpone(ofproto_destroy_defer__, ofproto); + } else + ovsrcu_postpone(ofproto_destroy__, ofproto); } void @@ -2927,6 +2956,7 @@ ofproto_rule_destroy__(struct rule *rule) cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr)); rule_actions_destroy(rule_get_actions(rule)); ovs_mutex_destroy(&rule->mutex); + ofproto_unref(rule->ofproto); rule->ofproto->ofproto_class->rule_dealloc(rule); } @@ -5265,6 +5295,7 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr, /* Initialize base state. */ *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto; + ofproto_ref(ofproto); cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), cr); ovs_refcount_init(&rule->ref_count); diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 2dd253167..3d3937b35 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -562,6 +562,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *, enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *, uint8_t table_id); +void ofproto_ref(struct ofproto *p); +void ofproto_unref(struct ofproto *p); +bool ofproto_try_ref(struct ofproto *p); + #ifdef __cplusplus } #endif -- 2.20.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev