On 30 Dec 2025, at 8:32, LIU Yulong wrote:
> Learn actions cache struct ofproto_flow_mod instances, but > ofproto_flow_mod_start() used to destroy their rule_criteria, > leaving later replays with dangling minimask pointers that > crashed in classifier_find_rule_exactly(). > This change adds a keep_flow_mod flag, clones the criteria > before the initial start, restores it afterward via new > clone/replace helpers, and lets normal ofproto_flow_mod_uninit() > free the clone. As a result, cached learn xcache > entries can safely replay without touching freed memory. > > The call trace: > *0 minimask_hash (basis=0, mask=0x7f05d7fd3f38) at > lib/classifier-private.h:325 > *1 find_subtable (cls=cls@entry=0x5598cf591588, mask=0x7f05d7fd3f38) at > lib/classifier.c:1431 > *2 0x00007f0770ab207d in classifier_find_rule_exactly > (cls=cls@entry=0x5598cf591588, > target=target@entry=0x7f064c696e30, version=18446744073709551615) at > lib/classifier.c:1184 > *3 0x00007f077110edb8 in collect_rules_strict > (ofproto=ofproto@entry=0x5598cf4f45c0, > criteria=criteria@entry=0x7f064c696e28, rules=rules@entry=0x7f064c696ec0) > at ofproto/ofproto.c:4524 > *4 0x00007f0771112c17 in modify_flow_start_strict (ofm=0x7f064c696e20, > ofproto=0x5598cf4f45c0) at ofproto/ofproto.c:5789 > *5 ofproto_flow_mod_start (ofproto=0x5598cf4f45c0, > ofm=ofm@entry=0x7f064c696e20) at ofproto/ofproto.c:7943 > *6 0x00007f0771112fd0 in ofproto_flow_mod_learn_start ( > ofm=ofm@entry=0x7f064c696e20) at ofproto/ofproto.c:5331 > *7 0x00007f0771114cad in ofproto_flow_mod_learn (ofm=0x7f064c696e20, > keep_ref=<optimized out>, limit=<optimized out>, below_limitp=0x0) at > ofproto/ofproto.c:5416 > *8 0x00007f0771141f73 in xlate_push_stats_entry (entry=0x7f0651563d10, > stats=0x0) at ofproto/ofproto-dpif-xlate-cache.c:127 > *9 0x00007f077114206d in ofpbuf_try_pull (size=40, > b=<synthetic pointer>) at include/openvswitch/ofpbuf.h:262 > *10 xlate_push_stats (xcache=<optimized out>, > stats=0x0) at ofproto/ofproto-dpif-xlate-cache.c:180 > *11 0x00007f077112e080 in push_dp_ops (udpif=0x5598cf4caf90, > ops=<optimized out>, n_ops=<optimized out>) at > ofproto/ofproto-dpif-upcall.c:2409 > *12 0x00007f077112e195 in push_ukey_ops (udpif=0x5598cf591588, umap=0x0, > ops=0x7f0669b1ffd0, n_ops=860610658) at ofproto/ofproto-dpif-upcall.c:2439 > *13 0x00005598cf4cb9a8 in ?? () > *14 0x00007f077112ef7d in revalidator_sweep__ (revalidator=<optimized out>, > purge=false) at ofproto/ofproto-dpif-upcall.c:2799 > *15 0x00007f07711323c5 in udpif_revalidator (arg=0x5598cf4dcf80) at > ofproto/ofproto-dpif-upcall.c:945 > *16 0x00007f0770b6715f in ovsthread_wrapper (aux_=<optimized out>) at > lib/ovs-thread.c:383 > *17 0x00007f076f922e65 in start_thread (arg=0x0) at pthread_create.c:282 > *18 0x00007f076ee4088d in __libc_ifunc_impl_list (name=<optimized out>, > array=0x7f0669b22700, max=<optimized out>) at > ../sysdeps/x86_64/multiarch/ifunc-impl-list.c:329 > > Signed-off-by: LIU Yulong <[email protected]> > --- Hi Yulong, Thanks for the patch, and sorry for the late reply. My first question would be whether you have a reproducer for this issue, and if so, could you add it as a unit test? > ofproto/ofproto-provider.h | 1 + > ofproto/ofproto.c | 45 ++++++++++++++++++++++++++++++++++++++ > 2 files changed, 46 insertions(+) > > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h > index 7df3f5246..be58dbed8 100644 > --- a/ofproto/ofproto-provider.h > +++ b/ofproto/ofproto-provider.h > @@ -2007,6 +2007,7 @@ struct ofproto_flow_mod { > bool modify_keep_counts; > enum nx_flow_update_event event; > uint8_t table_id; > + bool keep_flow_mod; /* Preserve criteria beyond start. */ > > /* These are only used during commit execution. > * ofproto_flow_mod_uninit() does NOT clean these up. */ > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c > index ec6d60a44..ae63b86ba 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -143,6 +143,10 @@ static void rule_criteria_init(struct rule_criteria *, > uint8_t table_id, > static void rule_criteria_require_rw(struct rule_criteria *, > bool can_write_readonly); > static void rule_criteria_destroy(struct rule_criteria *); > +static void rule_criteria_clone(struct rule_criteria *, > + const struct rule_criteria *); > +static void rule_criteria_replace(struct rule_criteria *, > + const struct rule_criteria *); > > static enum ofperr collect_rules_loose(struct ofproto *, > const struct rule_criteria *, > @@ -4600,6 +4604,31 @@ rules_mark_for_removal(struct ofproto *ofproto, struct > rule_collection *rules) > rule_collection_destroy(rules); > } > > +static void > +rule_criteria_clone(struct rule_criteria *dst, > + const struct rule_criteria *src) > +{ > + cls_rule_clone(&dst->cr, &src->cr); > + dst->table_id = src->table_id; > + dst->version = src->version; > + dst->cookie = src->cookie; > + dst->cookie_mask = src->cookie_mask; > + dst->out_port = src->out_port; > + dst->out_group = src->out_group; > + dst->include_hidden = src->include_hidden; > + dst->include_readonly = src->include_readonly; > +} > + > +static void > +rule_criteria_replace(struct rule_criteria *dst, > + const struct rule_criteria *src) > +{ > + if (dst->version != OVS_VERSION_NOT_REMOVED) { > + rule_criteria_destroy(dst); > + } > + rule_criteria_clone(dst, src); > +} > + > /* Schedules postponed removal of rules, destroys 'rules'. */ > static void > remove_rules_postponed(struct rule_collection *rules) > @@ -5605,6 +5634,14 @@ ofproto_flow_mod_learn_start(struct ofproto_flow_mod > *ofm) > OVS_REQUIRES(ofproto_mutex) > { > struct rule *rule = ofm->temp_rule; > + struct rule_criteria saved_criteria; > + bool saved = false; > + > + if (ofm->keep_flow_mod > + && ofm->criteria.version != OVS_VERSION_NOT_REMOVED) { > + rule_criteria_clone(&saved_criteria, &ofm->criteria); > + saved = true; > + } > > /* ofproto_flow_mod_start() consumes the reference, so we > * take a new one. */ > @@ -5612,6 +5649,11 @@ ofproto_flow_mod_learn_start(struct ofproto_flow_mod > *ofm) > enum ofperr error = ofproto_flow_mod_start(rule->ofproto, ofm); > ofm->temp_rule = rule; > > + if (saved) { > + rule_criteria_replace(&ofm->criteria, &saved_criteria); > + rule_criteria_destroy(&saved_criteria); > + } > + > return error; > } Not sure if you thought about it, but would it make more sense to use a non-copy approach for this? I drafted the following as an idea, but I haven’t tested it. diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c index c6d935cf0..ed046e58a 100644 --- a/ofproto/ofproto-dpif-xlate-cache.c +++ b/ofproto/ofproto-dpif-xlate-cache.c @@ -228,7 +228,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) mbridge_unref(entry->mirror.mbridge); break; case XC_LEARN: - ofproto_flow_mod_uninit(entry->learn.ofm); + ofproto_flow_mod_learn_uninit(entry->learn.ofm); free(entry->learn.ofm); break; case XC_NORMAL: diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 7df3f5246..32ec13988 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -2018,6 +2018,7 @@ struct ofproto_flow_mod { }; void ofproto_flow_mod_uninit(struct ofproto_flow_mod *); +void ofproto_flow_mod_learn_uninit(struct ofproto_flow_mod *); /* port_mod with execution context. */ struct ofproto_port_mod { diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index ec6d60a44..e6b319fac 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -269,6 +269,10 @@ static enum ofperr ofproto_flow_mod_init(struct ofproto *, const struct ofputil_flow_mod *fm, struct rule *) OVS_EXCLUDED(ofproto_mutex); +static enum ofperr ofproto_flow_mod_start__(struct ofproto *, + struct ofproto_flow_mod *, + bool free_resources) + OVS_REQUIRES(ofproto_mutex); static enum ofperr ofproto_flow_mod_start(struct ofproto *, struct ofproto_flow_mod *) OVS_REQUIRES(ofproto_mutex); @@ -5606,10 +5610,10 @@ ofproto_flow_mod_learn_start(struct ofproto_flow_mod *ofm) { struct rule *rule = ofm->temp_rule; - /* ofproto_flow_mod_start() consumes the reference, so we + /* ofproto_flow_mod_start__() consumes the reference, so we * take a new one. */ ofproto_rule_ref(rule); - enum ofperr error = ofproto_flow_mod_start(rule->ofproto, ofm); + enum ofperr error = ofproto_flow_mod_start__(rule->ofproto, ofm, false); ofm->temp_rule = rule; return error; @@ -6417,7 +6421,7 @@ handle_flow_mod__(struct ofproto *ofproto, const struct ofputil_flow_mod *fm, error = ofproto_flow_mod_start(ofproto, &ofm); if (!error) { ofproto_bump_tables_version(ofproto); - error = ofproto_flow_mod_finish(ofproto, &ofm, req); + error = ofproto_flow_mod_finish(ofproto, &ofm, req); ofmonitor_flush(ofproto->connmgr); } ovs_mutex_unlock(&ofproto_mutex); @@ -8206,6 +8210,12 @@ ofproto_flow_mod_uninit(struct ofproto_flow_mod *ofm) } } +void +ofproto_flow_mod_learn_uninit(struct ofproto_flow_mod *ofm) +{ + ofproto_flow_mod_uninit(ofm); +} + /* Initializes 'ofm' with 'ofproto', 'fm', and 'rule'. 'rule' may be null, but * if it is nonnull then the caller must own a reference to it, which on * success is transferred to 'ofm' and on failure is unreffed. */ @@ -8272,7 +8282,8 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, } static enum ofperr -ofproto_flow_mod_start(struct ofproto *ofproto, struct ofproto_flow_mod *ofm) +ofproto_flow_mod_start__(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, + bool free_resources) OVS_REQUIRES(ofproto_mutex) { enum ofperr error; @@ -8300,7 +8311,9 @@ ofproto_flow_mod_start(struct ofproto *ofproto, struct ofproto_flow_mod *ofm) OVS_NOT_REACHED(); } /* Release resources not needed after start. */ - ofproto_flow_mod_uninit(ofm); + if (!free_resources) { + ofproto_flow_mod_uninit(ofm); + } if (error) { rule_collection_destroy(&ofm->old_rules); @@ -8309,6 +8322,13 @@ ofproto_flow_mod_start(struct ofproto *ofproto, struct ofproto_flow_mod *ofm) return error; } +static enum ofperr +ofproto_flow_mod_start(struct ofproto *ofproto, struct ofproto_flow_mod *ofm) + OVS_REQUIRES(ofproto_mutex) +{ + return ofproto_flow_mod_start__(ofproto, ofm, true); +} + static void ofproto_flow_mod_revert(struct ofproto *ofproto, struct ofproto_flow_mod *ofm) OVS_REQUIRES(ofproto_mutex) > > @@ -5674,6 +5716,8 @@ ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, > bool keep_ref, > struct rule *rule = ofm->temp_rule; > bool below_limit = true; > > + ofm->keep_flow_mod = keep_ref; > + > /* Do we need to insert the rule? */ > if (!error && rule->state == RULE_INITIALIZED) { > ovs_mutex_lock(&ofproto_mutex); > @@ -8232,6 +8276,7 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct > ofproto_flow_mod *ofm, > ofm->conjs = NULL; > ofm->n_conjs = 0; > ofm->table_id = fm->table_id; > + ofm->keep_flow_mod = false; > > /* Initialize flag used by ofproto_dpif_xcache_execute(). */ > ofm->learn_adds_rule = false; > -- > 2.50.1 (Apple Git-155) > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
