On Tue, Nov 30, 2021 at 12:07 PM Dumitru Ceara <[email protected]> wrote: > > On 11/30/21 17:54, Numan Siddique wrote: > > On Tue, Nov 23, 2021 at 2:49 PM Dumitru Ceara <[email protected]> wrote: > >> > >> By default perform memory trimming 30 seconds after the lflow cache > >> utilization has dropped. Otherwise, idle ovn-controller processes might > >> wastefully fail to return unused memory to the system. The timeout is > >> configurable through the 'ovn-trim-timeout-ms' external_id. > >> > >> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2024768 > >> Signed-off-by: Dumitru Ceara <[email protected]> > > > > Thanks for adding this support. The patch and approach LGTM. > > > > I just have one question. Please see below. > > Thanks for the review! > > > > > Thanks > > Numan > > > >> --- > >> NEWS | 3 ++ > >> controller/chassis.c | 16 ++++++++ > >> controller/lflow-cache.c | 67 ++++++++++++++++++++++++++++++++- > >> controller/lflow-cache.h | 5 ++- > >> controller/ovn-controller.8.xml | 8 ++++ > >> controller/ovn-controller.c | 9 ++++- > >> controller/test-lflow-cache.c | 13 +++++-- > >> 7 files changed, 114 insertions(+), 7 deletions(-) > >> > >> diff --git a/NEWS b/NEWS > >> index e27aaad06..9880a678e 100644 > >> --- a/NEWS > >> +++ b/NEWS > >> @@ -12,6 +12,9 @@ Post v21.09.0 > >> running on SmartNIC control plane CPUs. Please refer to > >> Documentation/topics/vif-plug-providers/vif-plug-providers.rst for > >> more > >> information. > >> + - ovn-controller: Add configuration knob, through OVS external-id > >> + "ovn-trim-timeout-ms" to allow specifiying an lflow cache inactivity > >> + timeout after which ovn-controller should trim memory. > >> > >> OVN v21.09.0 - 01 Oct 2021 > >> -------------------------- > >> diff --git a/controller/chassis.c b/controller/chassis.c > >> index c11c10a34..8a1559653 100644 > >> --- a/controller/chassis.c > >> +++ b/controller/chassis.c > >> @@ -54,6 +54,7 @@ struct ovs_chassis_cfg { > >> const char *memlimit_lflow_cache; > >> const char *trim_limit_lflow_cache; > >> const char *trim_wmark_perc_lflow_cache; > >> + const char *trim_timeout_ms; > >> > >> /* Set of encap types parsed from the 'ovn-encap-type' external-id. */ > >> struct sset encap_type_set; > >> @@ -163,6 +164,12 @@ get_trim_wmark_perc_lflow_cache(const struct smap > >> *ext_ids) > >> return smap_get_def(ext_ids, "ovn-trim-wmark-perc-lflow-cache", ""); > >> } > >> > >> +static const char * > >> +get_trim_timeout(const struct smap *ext_ids) > >> +{ > >> + return smap_get_def(ext_ids, "ovn-trim-timeout-ms", ""); > >> +} > >> + > >> static const char * > >> get_encap_csum(const struct smap *ext_ids) > >> { > >> @@ -292,6 +299,7 @@ chassis_parse_ovs_config(const struct > >> ovsrec_open_vswitch_table *ovs_table, > >> get_trim_limit_lflow_cache(&cfg->external_ids); > >> ovs_cfg->trim_wmark_perc_lflow_cache = > >> get_trim_wmark_perc_lflow_cache(&cfg->external_ids); > >> + ovs_cfg->trim_timeout_ms = get_trim_timeout(&cfg->external_ids); > >> > >> if (!chassis_parse_ovs_encap_type(encap_type, > >> &ovs_cfg->encap_type_set)) { > >> return false; > >> @@ -336,6 +344,7 @@ chassis_build_other_config(const struct > >> ovs_chassis_cfg *ovs_cfg, > >> ovs_cfg->trim_limit_lflow_cache); > >> smap_replace(config, "ovn-trim-wmark-perc-lflow-cache", > >> ovs_cfg->trim_wmark_perc_lflow_cache); > >> + smap_replace(config, "ovn-trim-timeout-ms", ovs_cfg->trim_timeout_ms); > >> smap_replace(config, "iface-types", > >> ds_cstr_ro(&ovs_cfg->iface_types)); > >> smap_replace(config, "ovn-chassis-mac-mappings", > >> ovs_cfg->chassis_macs); > >> smap_replace(config, "is-interconn", > >> @@ -415,6 +424,13 @@ chassis_other_config_changed(const struct > >> ovs_chassis_cfg *ovs_cfg, > >> return true; > >> } > >> > >> + const char *chassis_trim_timeout_ms = > >> + get_trim_timeout(&chassis_rec->other_config); > >> + > >> + if (strcmp(ovs_cfg->trim_timeout_ms, chassis_trim_timeout_ms)) { > >> + return true; > >> + } > >> + > >> const char *chassis_mac_mappings = > >> get_chassis_mac_mappings(&chassis_rec->other_config); > >> if (strcmp(ovs_cfg->chassis_macs, chassis_mac_mappings)) { > >> diff --git a/controller/lflow-cache.c b/controller/lflow-cache.c > >> index 6db85fc12..9c3db06e7 100644 > >> --- a/controller/lflow-cache.c > >> +++ b/controller/lflow-cache.c > >> @@ -24,8 +24,10 @@ > >> #include "coverage.h" > >> #include "lflow-cache.h" > >> #include "lib/uuid.h" > >> +#include "openvswitch/poll-loop.h" > >> #include "openvswitch/vlog.h" > >> #include "ovn/expr.h" > >> +#include "timeval.h" > >> > >> VLOG_DEFINE_THIS_MODULE(lflow_cache); > >> > >> @@ -59,7 +61,10 @@ struct lflow_cache { > >> uint64_t max_mem_usage; > >> uint32_t trim_limit; > >> uint32_t trim_wmark_perc; > >> + uint32_t trim_timeout_ms; > >> uint64_t trim_count; > >> + long long int last_active_ms; > >> + bool recently_active; > >> bool enabled; > >> }; > >> > >> @@ -79,6 +84,7 @@ static struct lflow_cache_value *lflow_cache_add__( > >> static void lflow_cache_delete__(struct lflow_cache *lc, > >> struct lflow_cache_entry *lce); > >> static void lflow_cache_trim__(struct lflow_cache *lc, bool force); > >> +static void lflow_cache_record_activity__(struct lflow_cache *lc); > >> > >> struct lflow_cache * > >> lflow_cache_create(void) > >> @@ -128,7 +134,7 @@ lflow_cache_destroy(struct lflow_cache *lc) > >> void > >> lflow_cache_enable(struct lflow_cache *lc, bool enabled, uint32_t > >> capacity, > >> uint64_t max_mem_usage_kb, uint32_t lflow_trim_limit, > >> - uint32_t trim_wmark_perc) > >> + uint32_t trim_wmark_perc, uint32_t trim_timeout_ms) > >> { > >> if (!lc) { > >> return; > >> @@ -141,6 +147,13 @@ lflow_cache_enable(struct lflow_cache *lc, bool > >> enabled, uint32_t capacity, > >> trim_wmark_perc = 100; > >> } > >> > >> + if (trim_timeout_ms < 1000) { > >> + VLOG_WARN_RL(&rl, "Invalid requested trim timeout: " > >> + "requested %"PRIu32" ms, using 1000 ms instead", > >> + trim_timeout_ms); > >> + trim_timeout_ms = 1000; > >> + } > >> + > >> uint64_t max_mem_usage = max_mem_usage_kb * 1024; > >> bool need_flush = false; > >> bool need_trim = false; > >> @@ -160,10 +173,13 @@ lflow_cache_enable(struct lflow_cache *lc, bool > >> enabled, uint32_t capacity, > >> lc->max_mem_usage = max_mem_usage; > >> lc->trim_limit = lflow_trim_limit; > >> lc->trim_wmark_perc = trim_wmark_perc; > >> + lc->trim_timeout_ms = trim_timeout_ms; > >> > >> if (need_flush) { > >> + lflow_cache_record_activity__(lc); > >> lflow_cache_flush(lc); > >> } else if (need_trim) { > >> + lflow_cache_record_activity__(lc); > >> lflow_cache_trim__(lc, false); > >> } > >> } > >> @@ -271,6 +287,7 @@ lflow_cache_delete(struct lflow_cache *lc, const > >> struct uuid *lflow_uuid) > >> lflow_cache_delete__(lc, CONTAINER_OF(lcv, struct > >> lflow_cache_entry, > >> value)); > >> lflow_cache_trim__(lc, false); > >> + lflow_cache_record_activity__(lc); > >> } > >> } > >> > >> @@ -308,6 +325,46 @@ lflow_cache_get_memory_usage(const struct lflow_cache > >> *lc, struct simap *usage) > >> ROUND_UP(lc->mem_usage, 1024) / 1024); > >> } > >> > >> +void > >> +lflow_cache_run(struct lflow_cache *lc) > >> +{ > >> + if (!lc->recently_active) { > >> + return; > >> + } > >> + > >> + long long int now = time_msec(); > >> + if (now < lc->last_active_ms || now < lc->trim_timeout_ms) { > >> + VLOG_WARN_RL(&rl, "Detected cache last active timestamp > >> overflow"); > >> + lc->recently_active = false; > >> + lflow_cache_trim__(lc, true); > >> + return; > >> + } > >> + > >> + if (now < lc->trim_timeout_ms) { > >> + VLOG_WARN_RL(&rl, "Detected very large trim timeout: " > >> + "now %lld ms timeout %"PRIu32" ms", > >> + now, lc->trim_timeout_ms); > >> + return; > >> + } > >> + > >> + if (now - lc->trim_timeout_ms >= lc->last_active_ms) { > >> + VLOG_INFO_RL(&rl, "Detected cache inactivity " > >> + "(last active %lld ms ago): trimming cache", > >> + now - lc->last_active_ms); > >> + lflow_cache_trim__(lc, true); > >> + lc->recently_active = false; > >> + } > >> +} > >> + > >> +void > >> +lflow_cache_wait(struct lflow_cache *lc) > >> +{ > >> + if (!lc->recently_active) { > >> + return; > >> + } > >> + poll_timer_wait_until(lc->last_active_ms + lc->trim_timeout_ms); > >> +} > >> + > >> static struct lflow_cache_value * > >> lflow_cache_add__(struct lflow_cache *lc, const struct uuid *lflow_uuid, > >> enum lflow_cache_type type, uint64_t value_size) > >> @@ -332,6 +389,7 @@ lflow_cache_add__(struct lflow_cache *lc, const struct > >> uuid *lflow_uuid, > >> } > >> } > >> > >> + lflow_cache_record_activity__(lc); > > > > Do we need to record activity when a cache entry is added ? If a > > logical flow expr is cached, > > we probably don't need to trim the memory right as we probably are not > > freeing any memory in cache? > > It may be sufficient to trim the memory when an entry is deleted. > > > > Although it's possible that processing a logical flow and adding it to > > the cache may not free any memory in cache, > > but ovn-controller / IDL code may have freed the memory elsewhere. So > > there could be benefits in trimming. > > I'm just wondering if it would be an overkill ? What do you think ? > > What I was trying to achieve was something like "if there are no recent > changes to the cache state" then try to trim the cache (and memory). > > The main heuristic being that if there was a recent change then the > chance that something changes in the near future is bigger so we might > as well delay trimming for a bit. It's true that there might be long > periods of churn but in that case we also have the trim watermark > percentage to trigger trimming when the actual used cache size decreases. > > If we don't record activity when a cache entry is added then we'll > actually end up trimming memory more often than we do with the proposed > approach.
Thanks for the detailed explanation. Makes sense. I applied the patch to the main branch. Numan > > > > > Thanks > > Numan > > > > Thanks, > Dumitru > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
