Hi Paolo, On 14/01/2021 13:19, Paolo Valerio wrote: > adds "emc entries" to "ovs-appctl dpif-netdev/pmd-stats-show" in order > to show the number of alive entries. >
Thanks for working on this. Not a full review, but a few high level comments. I would like to ask about the motivation - is it so a user can judge the effectiveness of using the EMC to decide to disable it? or to tune the EMC insertion rate? If I run this and increase the flows, I see: emc hits: 1961314 emc entries: 6032 (73.63%) smc hits: 0 megaflow hits: 688946 If I look at pmd-perf-stats (below), it shows the % split between emc and megaflow and I think I would use that to judge the effectiveness of the emc. I'm not too fussed if the emc occupancy is high or low, I just want to know if it is being effective at getting hits for my pkts, so I'm not sure that 'emc entries: 6032 (73.63%)' helps me decide to enable/disable it. - EMC hits: 1972766 ( 74.0 %) - SMC hits: 0 ( 0.0 %) - Megaflow hits: 693022 ( 26.0 %, 1.00 subtbl lookups/hit) Of course it doesn't account for the differing cost between them, but if emc hits were a low %, I'd want to experiment with disabling it. Depending on your motivation, you might also want to take a look at the really nice fdb stats that Eelco did, it might give some ideas. Kevin. > Signed-off-by: Paolo Valerio <[email protected]> > --- > NEWS | 2 ++ > lib/dpif-netdev-perf.h | 2 ++ > lib/dpif-netdev.c | 76 +++++++++++++++++++++++++++++++----------- > tests/pmd.at | 6 ++-- > 4 files changed, 65 insertions(+), 21 deletions(-) > > diff --git a/NEWS b/NEWS > index 617fe8e6a..e5d53a83b 100644 > --- a/NEWS > +++ b/NEWS > @@ -20,6 +20,8 @@ Post-v2.14.0 > * Add generic IP protocol support to conntrack. With this change, all > none UDP, TCP, and ICMP traffic will be treated as general L3 > traffic, i.e. using 3 tupples. > + * EMC alive entries counter has been added to command > + "ovs-appctl dpif-netdev/pmd-stats-show" > - The environment variable OVS_UNBOUND_CONF, if set, is now used > as the DNS resolver's (unbound) configuration file. > - Linux datapath: > diff --git a/lib/dpif-netdev-perf.h b/lib/dpif-netdev-perf.h > index 72645b6b3..80f50eae9 100644 > --- a/lib/dpif-netdev-perf.h > +++ b/lib/dpif-netdev-perf.h > @@ -157,6 +157,8 @@ struct pmd_perf_stats { > uint64_t last_tsc; > /* Used to space certain checks in time. */ > uint64_t next_check_tsc; > + /* Exact Match Cache used entries counter. */ > + atomic_uint32_t emc_n_entries; > /* If non-NULL, outermost cycle timer currently running in PMD. */ > struct cycle_timer *cur_timer; > /* Set of PMD counters with their zero offsets. */ > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 300861ca5..3eb70ccd5 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -913,7 +913,7 @@ dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread > *pmd, > odp_port_t in_port); > > static inline bool emc_entry_alive(struct emc_entry *ce); > -static void emc_clear_entry(struct emc_entry *ce); > +static void emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s); > static void smc_clear_entry(struct smc_bucket *b, int idx); > > static void dp_netdev_request_reconfigure(struct dp_netdev *dp); > @@ -955,13 +955,16 @@ dfc_cache_init(struct dfc_cache *flow_cache) > } > > static void > -emc_cache_uninit(struct emc_cache *flow_cache) > +emc_cache_uninit(struct emc_cache *flow_cache, struct pmd_perf_stats *s) > { > int i; > > + > for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) { > - emc_clear_entry(&flow_cache->entries[i]); > + emc_clear_entry(&flow_cache->entries[i], s); > } > + > + atomic_store_relaxed(&s->emc_n_entries, 0); > } > > static void > @@ -977,21 +980,21 @@ smc_cache_uninit(struct smc_cache *smc) > } > > static void > -dfc_cache_uninit(struct dfc_cache *flow_cache) > +dfc_cache_uninit(struct dfc_cache *flow_cache, struct pmd_perf_stats *s) > { > smc_cache_uninit(&flow_cache->smc_cache); > - emc_cache_uninit(&flow_cache->emc_cache); > + emc_cache_uninit(&flow_cache->emc_cache, s); > } > > /* Check and clear dead flow references slowly (one entry at each > * invocation). */ > static void > -emc_cache_slow_sweep(struct emc_cache *flow_cache) > +emc_cache_slow_sweep(struct emc_cache *flow_cache, struct pmd_perf_stats *s) > { > struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx]; > > if (!emc_entry_alive(entry)) { > - emc_clear_entry(entry); > + emc_clear_entry(entry, s); > } > flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK; > } > @@ -1093,15 +1096,26 @@ pmd_info_show_stats(struct ds *reply, > " packets received: %"PRIu64"\n" > " packet recirculations: %"PRIu64"\n" > " avg. datapath passes per packet: %.02f\n" > - " emc hits: %"PRIu64"\n" > + " emc hits: %"PRIu64"\n", > + total_packets, stats[PMD_STAT_RECIRC], > + passes_per_pkt, stats[PMD_STAT_EXACT_HIT] > + ); > + > + if (pmd->core_id != NON_PMD_CORE_ID) { > + uint32_t emc_entries; > + atomic_read_relaxed(&pmd->perf_stats.emc_n_entries, &emc_entries); > + ds_put_format(reply, " emc entries: %"PRIu32" (%.02f%%)\n", > + emc_entries, > + 100.0 * emc_entries / EM_FLOW_HASH_ENTRIES); > + } > + > + ds_put_format(reply, > " smc hits: %"PRIu64"\n" > " megaflow hits: %"PRIu64"\n" > " avg. subtable lookups per megaflow hit: %.02f\n" > " miss with success upcall: %"PRIu64"\n" > " miss with failed upcall: %"PRIu64"\n" > " avg. packets per output batch: %.02f\n", > - total_packets, stats[PMD_STAT_RECIRC], > - passes_per_pkt, stats[PMD_STAT_EXACT_HIT], > stats[PMD_STAT_SMC_HIT], > stats[PMD_STAT_MASKED_HIT], lookups_per_hit, > stats[PMD_STAT_MISS], stats[PMD_STAT_LOST], > @@ -2004,6 +2018,26 @@ non_atomic_ullong_add(atomic_ullong *var, unsigned > long long n) > atomic_store_relaxed(var, tmp); > } > > +static void > +non_atomic_dec(atomic_uint32_t *var) > +{ > + unsigned int tmp; > + > + atomic_read_relaxed(var, &tmp); > + tmp--; > + atomic_store_relaxed(var, tmp); > +} > + > +static void > +non_atomic_inc(atomic_uint32_t *var) > +{ > + unsigned int tmp; > + > + atomic_read_relaxed(var, &tmp); > + tmp++; > + atomic_store_relaxed(var, tmp); > +} > + > static int > dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) > { > @@ -3070,25 +3104,28 @@ emc_entry_alive(struct emc_entry *ce) > } > > static void > -emc_clear_entry(struct emc_entry *ce) > +emc_clear_entry(struct emc_entry *ce, struct pmd_perf_stats *s) > { > if (ce->flow) { > dp_netdev_flow_unref(ce->flow); > ce->flow = NULL; > + non_atomic_dec(&s->emc_n_entries); > } > } > > static inline void > emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow, > - const struct netdev_flow_key *key) > + const struct netdev_flow_key *key, struct pmd_perf_stats *s) > { > if (ce->flow != flow) { > if (ce->flow) { > dp_netdev_flow_unref(ce->flow); > + non_atomic_dec(&s->emc_n_entries); > } > > if (dp_netdev_flow_ref(flow)) { > ce->flow = flow; > + non_atomic_inc(&s->emc_n_entries); > } else { > ce->flow = NULL; > } > @@ -3100,7 +3137,7 @@ emc_change_entry(struct emc_entry *ce, struct > dp_netdev_flow *flow, > > static inline void > emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key, > - struct dp_netdev_flow *flow) > + struct dp_netdev_flow *flow, struct pmd_perf_stats *s) > { > struct emc_entry *to_be_replaced = NULL; > struct emc_entry *current_entry; > @@ -3108,7 +3145,7 @@ emc_insert(struct emc_cache *cache, const struct > netdev_flow_key *key, > EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) { > if (netdev_flow_key_equal(¤t_entry->key, key)) { > /* We found the entry with the 'mf' miniflow */ > - emc_change_entry(current_entry, flow, NULL); > + emc_change_entry(current_entry, flow, NULL, s); > return; > } > > @@ -3124,7 +3161,7 @@ emc_insert(struct emc_cache *cache, const struct > netdev_flow_key *key, > /* We didn't find the miniflow in the cache. > * The 'to_be_replaced' entry is where the new flow will be stored */ > > - emc_change_entry(to_be_replaced, flow, key); > + emc_change_entry(to_be_replaced, flow, key, s); > } > > static inline void > @@ -3139,7 +3176,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread > *pmd, > uint32_t min = pmd->ctx.emc_insert_min; > > if (min && random_uint32() <= min) { > - emc_insert(&(pmd->flow_cache).emc_cache, key, flow); > + emc_insert(&(pmd->flow_cache).emc_cache, key, flow, > &pmd->perf_stats); > } > } > > @@ -6014,7 +6051,8 @@ reload: > coverage_try_clear(); > dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt); > if (!ovsrcu_try_quiesce()) { > - emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache)); > + emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache), > + &pmd->perf_stats); > pmd->next_rcu_quiesce = > pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL; > } > @@ -6058,7 +6096,7 @@ reload: > } > > pmd_free_static_tx_qid(pmd); > - dfc_cache_uninit(&pmd->flow_cache); > + dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats); > free(poll_list); > pmd_free_cached_ports(pmd); > return NULL; > @@ -6537,7 +6575,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct > dp_netdev_pmd_thread *pmd) > * but extra cleanup is necessary */ > if (pmd->core_id == NON_PMD_CORE_ID) { > ovs_mutex_lock(&dp->non_pmd_mutex); > - dfc_cache_uninit(&pmd->flow_cache); > + dfc_cache_uninit(&pmd->flow_cache, &pmd->perf_stats); > pmd_free_cached_ports(pmd); > pmd_free_static_tx_qid(pmd); > ovs_mutex_unlock(&dp->non_pmd_mutex); > diff --git a/tests/pmd.at b/tests/pmd.at > index cc5371d5a..45c69563c 100644 > --- a/tests/pmd.at > +++ b/tests/pmd.at > @@ -202,12 +202,13 @@ dummy@ovs-dummy: hit:0 missed:0 > p0 7/1: (dummy-pmd: configured_rx_queues=4, > configured_tx_queues=<cleared>, requested_rx_queues=4, > requested_tx_queues=<cleared>) > ]) > > -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN > | sed '/cycles/d' | grep pmd -A 9], [0], [dnl > +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN > | sed '/cycles/d' | grep pmd -A 10], [0], [dnl > pmd thread numa_id <cleared> core_id <cleared>: > packets received: 0 > packet recirculations: 0 > avg. datapath passes per packet: 0.00 > emc hits: 0 > + emc entries: 0 (0.00%) > smc hits: 0 > megaflow hits: 0 > avg. subtable lookups per megaflow hit: 0.00 > @@ -233,12 +234,13 @@ AT_CHECK([cat ovs-vswitchd.log | filter_flow_install | > strip_xout], [0], [dnl > > recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:77,dst=50:54:00:00:01:78),eth_type(0x0800),ipv4(frag=no), > actions: <del> > ]) > > -AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN > | sed '/cycles/d' | grep pmd -A 9], [0], [dnl > +AT_CHECK([ovs-appctl dpif-netdev/pmd-stats-show | sed SED_NUMA_CORE_PATTERN > | sed '/cycles/d' | grep pmd -A 10], [0], [dnl > pmd thread numa_id <cleared> core_id <cleared>: > packets received: 20 > packet recirculations: 0 > avg. datapath passes per packet: 1.00 > emc hits: 19 > + emc entries: 1 (0.01%) > smc hits: 0 > megaflow hits: 0 > avg. subtable lookups per megaflow hit: 0.00 > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
