On 30 Dec 2025, at 8:32, LIU Yulong wrote:

> Learn actions cache struct ofproto_flow_mod instances, but
> ofproto_flow_mod_start() used to destroy their rule_criteria,
> leaving later replays with dangling minimask pointers that
> crashed in classifier_find_rule_exactly().
> This change adds a keep_flow_mod flag, clones the criteria
> before the initial start, restores it afterward via new
> clone/replace helpers, and lets normal ofproto_flow_mod_uninit()
> free the clone. As a result, cached learn xcache
> entries can safely replay without touching freed memory.
>
> The call trace:
> *0  minimask_hash (basis=0, mask=0x7f05d7fd3f38) at 
> lib/classifier-private.h:325
> *1  find_subtable (cls=cls@entry=0x5598cf591588, mask=0x7f05d7fd3f38) at 
> lib/classifier.c:1431
> *2  0x00007f0770ab207d in classifier_find_rule_exactly 
> (cls=cls@entry=0x5598cf591588,
>     target=target@entry=0x7f064c696e30, version=18446744073709551615) at 
> lib/classifier.c:1184
> *3  0x00007f077110edb8 in collect_rules_strict 
> (ofproto=ofproto@entry=0x5598cf4f45c0,
>     criteria=criteria@entry=0x7f064c696e28, rules=rules@entry=0x7f064c696ec0) 
> at ofproto/ofproto.c:4524
> *4  0x00007f0771112c17 in modify_flow_start_strict (ofm=0x7f064c696e20,
>     ofproto=0x5598cf4f45c0) at ofproto/ofproto.c:5789
> *5  ofproto_flow_mod_start (ofproto=0x5598cf4f45c0,
>     ofm=ofm@entry=0x7f064c696e20) at ofproto/ofproto.c:7943
> *6  0x00007f0771112fd0 in ofproto_flow_mod_learn_start (
>     ofm=ofm@entry=0x7f064c696e20) at ofproto/ofproto.c:5331
> *7  0x00007f0771114cad in ofproto_flow_mod_learn (ofm=0x7f064c696e20,
>     keep_ref=<optimized out>, limit=<optimized out>, below_limitp=0x0) at 
> ofproto/ofproto.c:5416
> *8  0x00007f0771141f73 in xlate_push_stats_entry (entry=0x7f0651563d10,
>     stats=0x0) at ofproto/ofproto-dpif-xlate-cache.c:127
> *9  0x00007f077114206d in ofpbuf_try_pull (size=40,
>     b=<synthetic pointer>) at include/openvswitch/ofpbuf.h:262
> *10 xlate_push_stats (xcache=<optimized out>,
>     stats=0x0) at ofproto/ofproto-dpif-xlate-cache.c:180
> *11 0x00007f077112e080 in push_dp_ops (udpif=0x5598cf4caf90,
>     ops=<optimized out>, n_ops=<optimized out>) at 
> ofproto/ofproto-dpif-upcall.c:2409
> *12 0x00007f077112e195 in push_ukey_ops (udpif=0x5598cf591588, umap=0x0,
>     ops=0x7f0669b1ffd0, n_ops=860610658) at ofproto/ofproto-dpif-upcall.c:2439
> *13 0x00005598cf4cb9a8 in ?? ()
> *14 0x00007f077112ef7d in revalidator_sweep__ (revalidator=<optimized out>,
>     purge=false) at ofproto/ofproto-dpif-upcall.c:2799
> *15 0x00007f07711323c5 in udpif_revalidator (arg=0x5598cf4dcf80) at 
> ofproto/ofproto-dpif-upcall.c:945
> *16 0x00007f0770b6715f in ovsthread_wrapper (aux_=<optimized out>) at 
> lib/ovs-thread.c:383
> *17 0x00007f076f922e65 in start_thread (arg=0x0) at pthread_create.c:282
> *18 0x00007f076ee4088d in __libc_ifunc_impl_list (name=<optimized out>,
>     array=0x7f0669b22700, max=<optimized out>) at 
> ../sysdeps/x86_64/multiarch/ifunc-impl-list.c:329
>
> Signed-off-by: LIU Yulong <[email protected]>
> ---

Hi Yulong,

Thanks for the patch, and sorry for the late reply. My first question would be 
whether you have a reproducer for this issue, and if so, could you add it as a 
unit test?

>  ofproto/ofproto-provider.h |  1 +
>  ofproto/ofproto.c          | 45 ++++++++++++++++++++++++++++++++++++++
>  2 files changed, 46 insertions(+)
>
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 7df3f5246..be58dbed8 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -2007,6 +2007,7 @@ struct ofproto_flow_mod {
>      bool modify_keep_counts;
>      enum nx_flow_update_event event;
>      uint8_t table_id;
> +    bool keep_flow_mod;              /* Preserve criteria beyond start. */
>
>      /* These are only used during commit execution.
>       * ofproto_flow_mod_uninit() does NOT clean these up. */
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index ec6d60a44..ae63b86ba 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -143,6 +143,10 @@ static void rule_criteria_init(struct rule_criteria *, 
> uint8_t table_id,
>  static void rule_criteria_require_rw(struct rule_criteria *,
>                                       bool can_write_readonly);
>  static void rule_criteria_destroy(struct rule_criteria *);
> +static void rule_criteria_clone(struct rule_criteria *,
> +                                const struct rule_criteria *);
> +static void rule_criteria_replace(struct rule_criteria *,
> +                                  const struct rule_criteria *);
>
>  static enum ofperr collect_rules_loose(struct ofproto *,
>                                         const struct rule_criteria *,
> @@ -4600,6 +4604,31 @@ rules_mark_for_removal(struct ofproto *ofproto, struct 
> rule_collection *rules)
>      rule_collection_destroy(rules);
>  }
>
> +static void
> +rule_criteria_clone(struct rule_criteria *dst,
> +                    const struct rule_criteria *src)
> +{
> +    cls_rule_clone(&dst->cr, &src->cr);
> +    dst->table_id = src->table_id;
> +    dst->version = src->version;
> +    dst->cookie = src->cookie;
> +    dst->cookie_mask = src->cookie_mask;
> +    dst->out_port = src->out_port;
> +    dst->out_group = src->out_group;
> +    dst->include_hidden = src->include_hidden;
> +    dst->include_readonly = src->include_readonly;
> +}
> +
> +static void
> +rule_criteria_replace(struct rule_criteria *dst,
> +                      const struct rule_criteria *src)
> +{
> +    if (dst->version != OVS_VERSION_NOT_REMOVED) {
> +        rule_criteria_destroy(dst);
> +    }
> +    rule_criteria_clone(dst, src);
> +}
> +
>  /* Schedules postponed removal of rules, destroys 'rules'. */
>  static void
>  remove_rules_postponed(struct rule_collection *rules)
> @@ -5605,6 +5634,14 @@ ofproto_flow_mod_learn_start(struct ofproto_flow_mod 
> *ofm)
>      OVS_REQUIRES(ofproto_mutex)
>  {
>      struct rule *rule = ofm->temp_rule;
> +    struct rule_criteria saved_criteria;
> +    bool saved = false;
> +
> +    if (ofm->keep_flow_mod
> +        && ofm->criteria.version != OVS_VERSION_NOT_REMOVED) {
> +        rule_criteria_clone(&saved_criteria, &ofm->criteria);
> +        saved = true;
> +    }
>
>      /* ofproto_flow_mod_start() consumes the reference, so we
>       * take a new one. */
> @@ -5612,6 +5649,11 @@ ofproto_flow_mod_learn_start(struct ofproto_flow_mod 
> *ofm)
>      enum ofperr error = ofproto_flow_mod_start(rule->ofproto, ofm);
>      ofm->temp_rule = rule;
>
> +    if (saved) {
> +        rule_criteria_replace(&ofm->criteria, &saved_criteria);
> +        rule_criteria_destroy(&saved_criteria);
> +    }
> +
>      return error;
>  }

Not sure if you thought about it, but would it make more sense to use a 
non-copy approach for this?
I drafted the following as an idea, but I haven’t tested it.

diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
b/ofproto/ofproto-dpif-xlate-cache.c
index c6d935cf0..ed046e58a 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -228,7 +228,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
         mbridge_unref(entry->mirror.mbridge);
         break;
     case XC_LEARN:
-        ofproto_flow_mod_uninit(entry->learn.ofm);
+        ofproto_flow_mod_learn_uninit(entry->learn.ofm);
         free(entry->learn.ofm);
         break;
     case XC_NORMAL:
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 7df3f5246..32ec13988 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -2018,6 +2018,7 @@ struct ofproto_flow_mod {
 };

 void ofproto_flow_mod_uninit(struct ofproto_flow_mod *);
+void ofproto_flow_mod_learn_uninit(struct ofproto_flow_mod *);

 /* port_mod with execution context. */
 struct ofproto_port_mod {
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index ec6d60a44..e6b319fac 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -269,6 +269,10 @@ static enum ofperr ofproto_flow_mod_init(struct ofproto *,
                                          const struct ofputil_flow_mod *fm,
                                          struct rule *)
     OVS_EXCLUDED(ofproto_mutex);
+static enum ofperr ofproto_flow_mod_start__(struct ofproto *,
+                                            struct ofproto_flow_mod *,
+                                            bool free_resources)
+    OVS_REQUIRES(ofproto_mutex);
 static enum ofperr ofproto_flow_mod_start(struct ofproto *,
                                           struct ofproto_flow_mod *)
     OVS_REQUIRES(ofproto_mutex);
@@ -5606,10 +5610,10 @@ ofproto_flow_mod_learn_start(struct ofproto_flow_mod 
*ofm)
 {
     struct rule *rule = ofm->temp_rule;

-    /* ofproto_flow_mod_start() consumes the reference, so we
+    /* ofproto_flow_mod_start__() consumes the reference, so we
      * take a new one. */
     ofproto_rule_ref(rule);
-    enum ofperr error = ofproto_flow_mod_start(rule->ofproto, ofm);
+    enum ofperr error = ofproto_flow_mod_start__(rule->ofproto, ofm, false);
     ofm->temp_rule = rule;

     return error;
@@ -6417,7 +6421,7 @@ handle_flow_mod__(struct ofproto *ofproto, const struct 
ofputil_flow_mod *fm,
     error = ofproto_flow_mod_start(ofproto, &ofm);
     if (!error) {
         ofproto_bump_tables_version(ofproto);
-        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
+        error = ofproto_flow_mod_finish(ofproto, &ofm, req);
         ofmonitor_flush(ofproto->connmgr);
     }
     ovs_mutex_unlock(&ofproto_mutex);
@@ -8206,6 +8210,12 @@ ofproto_flow_mod_uninit(struct ofproto_flow_mod *ofm)
     }
 }

+void
+ofproto_flow_mod_learn_uninit(struct ofproto_flow_mod *ofm)
+{
+    ofproto_flow_mod_uninit(ofm);
+}
+
 /* Initializes 'ofm' with 'ofproto', 'fm', and 'rule'.  'rule' may be null, but
  * if it is nonnull then the caller must own a reference to it, which on
  * success is transferred to 'ofm' and on failure is unreffed. */
@@ -8272,7 +8282,8 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct 
ofproto_flow_mod *ofm,
 }

 static enum ofperr
-ofproto_flow_mod_start(struct ofproto *ofproto, struct ofproto_flow_mod *ofm)
+ofproto_flow_mod_start__(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
+                         bool free_resources)
     OVS_REQUIRES(ofproto_mutex)
 {
     enum ofperr error;
@@ -8300,7 +8311,9 @@ ofproto_flow_mod_start(struct ofproto *ofproto, struct 
ofproto_flow_mod *ofm)
         OVS_NOT_REACHED();
     }
     /* Release resources not needed after start. */
-    ofproto_flow_mod_uninit(ofm);
+    if (!free_resources) {
+        ofproto_flow_mod_uninit(ofm);
+    }

     if (error) {
         rule_collection_destroy(&ofm->old_rules);
@@ -8309,6 +8322,13 @@ ofproto_flow_mod_start(struct ofproto *ofproto, struct 
ofproto_flow_mod *ofm)
     return error;
 }

+static enum ofperr
+ofproto_flow_mod_start(struct ofproto *ofproto, struct ofproto_flow_mod *ofm)
+    OVS_REQUIRES(ofproto_mutex)
+{
+    return ofproto_flow_mod_start__(ofproto, ofm, true);
+}
+
 static void
 ofproto_flow_mod_revert(struct ofproto *ofproto, struct ofproto_flow_mod *ofm)
     OVS_REQUIRES(ofproto_mutex)


>
> @@ -5674,6 +5716,8 @@ ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, 
> bool keep_ref,
>      struct rule *rule = ofm->temp_rule;
>      bool below_limit = true;
>
> +    ofm->keep_flow_mod = keep_ref;
> +
>      /* Do we need to insert the rule? */
>      if (!error && rule->state == RULE_INITIALIZED) {
>          ovs_mutex_lock(&ofproto_mutex);
> @@ -8232,6 +8276,7 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct 
> ofproto_flow_mod *ofm,
>      ofm->conjs = NULL;
>      ofm->n_conjs = 0;
>      ofm->table_id = fm->table_id;
> +    ofm->keep_flow_mod = false;
>
>      /* Initialize flag used by ofproto_dpif_xcache_execute(). */
>      ofm->learn_adds_rule = false;
> -- 
> 2.50.1 (Apple Git-155)
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to