Thanks for the patch! Tested-by: Vladislav Odintsov <[email protected]>
Regards, Vladislav Odintsov > On 22 Nov 2021, at 18:23, Ilya Maximets <[email protected]> wrote: > > While removing flows, removal itself is deferred, so classifier changes > performed already from the RCU thread. This way every deferred removal > triggers classifier change and reallocation of a pvector. Freeing of > old version of a pvector is postponed. Since all this is happening > from an RCU thread, all these copies of the same pvector will be freed > only after the next grace period. > > Below is the example output of the 'valgrind --tool=massif' from an OVN > deployment, where copies of that pvector took 5 GB of memory while > processing a bundled flow removal: > > ------------------------------------------------------------------- > n time(i) total(B) useful-heap(B) extra-heap(B) > ------------------------------------------------------------------- > 89 176,257,987,954 5,329,763,160 5,318,171,607 11,591,553 > 99.78% (5,318,171,607B) (heap allocation functions) malloc/new/new[] > ->98.45% (5,247,008,392B) xmalloc__ (util.c:137) > |->98.17% (5,232,137,408B) pvector_impl_dup (pvector.c:48) > ||->98.16% (5,231,472,896B) pvector_remove (pvector.c:159) > |||->98.16% (5,231,472,800B) destroy_subtable (classifier.c:1558) > ||||->98.16% (5,231,472,800B) classifier_remove (classifier.c:792) > |||| ->98.16% (5,231,472,800B) classifier_remove_assert (classifier.c:832) > |||| ->98.16% (5,231,472,800B) remove_rule_rcu__ (ofproto.c:2978) > |||| ->98.16% (5,231,472,800B) remove_rule_rcu (ofproto.c:2990) > |||| ->98.16% (5,231,472,800B) ovsrcu_call_postponed (ovs-rcu.c:346) > |||| ->98.16% (5,231,472,800B) ovsrcu_postpone_thread (ovs-rcu.c:362) > |||| ->98.16% (5,231,472,800B) ovsthread_wrapper > |||| ->98.16% (5,231,472,800B) start_thread > |||| ->98.16% (5,231,472,800B) clone > > Collecting all the flows to be removed and postponing removal for > all of them together to avoid the problem. This way all removals > will trigger only a single pvector re-allocation greatly reducing > the CPU and memory usage. > > Reported-by: Vladislav Odintsov <[email protected]> > Reported-at: > https://mail.openvswitch.org/pipermail/ovs-dev/2021-November/389538.html > Signed-off-by: Ilya Maximets <[email protected]> > --- > ofproto/ofproto-provider.h | 4 ++++ > ofproto/ofproto.c | 31 +++++++++++++++++++++++++++++-- > 2 files changed, 33 insertions(+), 2 deletions(-) > > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h > index 57c7d17cb..04c97102f 100644 > --- a/ofproto/ofproto-provider.h > +++ b/ofproto/ofproto-provider.h > @@ -66,6 +66,7 @@ struct bfd_cfg; > struct meter; > struct ofoperation; > struct ofproto_packet_out; > +struct rule_collection; > struct smap; > > extern struct ovs_mutex ofproto_mutex; > @@ -115,6 +116,9 @@ struct ofproto { > /* List of expirable flows, in all flow tables. */ > struct ovs_list expirable OVS_GUARDED_BY(ofproto_mutex); > > + /* List of flows to remove from flow tables. */ > + struct rule_collection *to_remove OVS_GUARDED_BY(ofproto_mutex); > + > /* Meter table. */ > struct ofputil_meter_features meter_features; > struct hmap meters; /* uint32_t indexed 'struct meter *'. */ > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c > index bd6103b1c..cbc376f63 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, > struct rule *) > OVS_REQUIRES(ofproto_mutex); > static void ofproto_rule_remove__(struct ofproto *, struct rule *) > OVS_REQUIRES(ofproto_mutex); > +static void remove_rules_postponed(struct rule_collection *) > + OVS_REQUIRES(ofproto_mutex); > > /* The source of an OpenFlow request. > * > @@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char > *datapath_type, > hindex_init(&ofproto->cookies); > hmap_init(&ofproto->learned_cookies); > ovs_list_init(&ofproto->expirable); > + ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove); > + rule_collection_init(ofproto->to_remove); > ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name); > ofproto->min_mtu = INT_MAX; > cmap_init(&ofproto->groups); > @@ -1631,6 +1635,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del) > } > ofproto_group_delete_all__(ofproto); > meter_delete_all(ofproto); > + remove_rules_postponed(ofproto->to_remove); > /* XXX: Concurrent handler threads may insert new learned flows based on > * learn actions of the now deleted flows right after we release > * 'ofproto_mutex'. */ > @@ -1682,6 +1687,11 @@ ofproto_destroy__(struct ofproto *ofproto) > ovs_assert(hmap_is_empty(&ofproto->learned_cookies)); > hmap_destroy(&ofproto->learned_cookies); > > + ovs_mutex_lock(&ofproto_mutex); > + rule_collection_destroy(ofproto->to_remove); > + free(ofproto->to_remove); > + ovs_mutex_unlock(&ofproto_mutex); > + > ofproto->ofproto_class->dealloc(ofproto); > } > > @@ -1878,6 +1888,9 @@ ofproto_run(struct ofproto *p) > > connmgr_run(p->connmgr, handle_openflow); > > + ovs_mutex_lock(&ofproto_mutex); > + remove_rules_postponed(p->to_remove); > + ovs_mutex_unlock(&ofproto_mutex); > return error; > } > > @@ -4437,6 +4450,20 @@ rule_criteria_destroy(struct rule_criteria *criteria) > criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */ > } > > +/* Adds rules to the 'to_remove' collection, so they can be destroyed > + * later all together. Destroys 'rules'. */ > +static void > +rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection > *rules) > + OVS_REQUIRES(ofproto_mutex) > +{ > + struct rule *rule; > + > + RULE_COLLECTION_FOR_EACH (rule, rules) { > + rule_collection_add(ofproto->to_remove, rule); > + } > + rule_collection_destroy(rules); > +} > + > /* Schedules postponed removal of rules, destroys 'rules'. */ > static void > remove_rules_postponed(struct rule_collection *rules) > @@ -5833,7 +5860,7 @@ modify_flows_finish(struct ofproto *ofproto, struct > ofproto_flow_mod *ofm, > } > } > learned_cookies_flush(ofproto, &dead_cookies); > - remove_rules_postponed(old_rules); > + rules_mark_for_removal(ofproto, old_rules); > } > > return error; > @@ -5941,7 +5968,7 @@ delete_flows_finish__(struct ofproto *ofproto, > learned_cookies_dec(ofproto, rule_get_actions(rule), > &dead_cookies); > } > - remove_rules_postponed(rules); > + rules_mark_for_removal(ofproto, rules); > > learned_cookies_flush(ofproto, &dead_cookies); > } > -- > 2.31.1 > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
