Unconditional insertion of EMC entries results in EMC thrashing at high numbers of parallel flows. When this occurs, the performance of the EMC often falls below that of the dpcls classifier, rendering the EMC practically useless.
Instead of unconditionally inserting entries into the EMC when a miss occurs, use a 1% probability of insertion. This ensures that the most frequent flows have the highest chance of creating an entry in the EMC, and the probability of thrashing the EMC is also greatly reduced. The probability of insertion is configurable, via the other_config:emc-insert-prob option. For example the following command increases the insertion probability to 1/10 ie. 10%. ovs-vsctl set Open_vSwitch . other_config:emc-insert-prob=10 Signed-off-by: Ciara Loftus <[email protected]> Signed-off-by: Georg Schmuecking <[email protected]> Co-authored-by: Georg Schmuecking <[email protected]> --- v2: - Enable probability configurability via other_config:emc-insert-prob option. Documentation/howto/dpdk.rst | 23 +++++++++++++++++++++++ NEWS | 2 ++ lib/dpdk.c | 15 +++++++++++++++ lib/dpdk.h | 1 + lib/dpif-netdev.c | 28 ++++++++++++++++++++++++++-- vswitchd/vswitch.xml | 17 +++++++++++++++++ 6 files changed, 84 insertions(+), 2 deletions(-) diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index d1e6e89..a37b9d5 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -354,6 +354,29 @@ the `DPDK documentation Note: Not all DPDK virtual PMD drivers have been tested and verified to work. +EMC Insertion Probability +------------------------- +By default 1 in every 100 flows are inserted into the Exact Match Cache (EMC). +It is possible to change this insertion probability by setting the +``emc-insert-prob`` option:: + + $ ovs-vsctl --no-wait set Open_vSwitch . other_config:emc-insert-prob=N + +where: + +``N`` + is a positive integer between 0 and 4294967295. + +If ``N`` is set to 1, an insertion will be performed for every flow. The lower +the value of ``emc-insert-prob`` the higher the probability of insertion, +except for the value 0 which will result in no insertions being performed and +thus essentially disabling the EMC. + +If ``emc-insert-prob`` is modified, the daemon needs to be restarted in order +for the changes to take effect. + +For more information on the EMC refer to :doc:`/intro/install/dpdk` . + .. _dpdk-ovs-in-guest: OVS with DPDK Inside VMs diff --git a/NEWS b/NEWS index 0a9551c..8fb1f53 100644 --- a/NEWS +++ b/NEWS @@ -63,6 +63,8 @@ Post-v2.6.0 device will not be available for use until a valid dpdk-devargs is specified. * Virtual DPDK Poll Mode Driver (vdev PMD) support. + * New 'other_config:emc-insert-prob' field for userspace netdevs that + allows definition of the EMC insertion probability. - Fedora packaging: * A package upgrade does not automatically restart OVS service. - ovs-vswitchd/ovs-vsctl: diff --git a/lib/dpdk.c b/lib/dpdk.c index 9ae2491..bb9e758 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -38,6 +38,8 @@ VLOG_DEFINE_THIS_MODULE(dpdk); static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ +static uint32_t emc_insert_min = UINT32_MAX / 100; + static int process_vhost_flags(char *flag, char *default_val, int size, const struct smap *ovs_other_config, @@ -272,6 +274,7 @@ dpdk_init__(const struct smap *ovs_other_config) int err = 0; cpu_set_t cpuset; char *sock_dir_subcomponent; + int insert_prob; if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()), NAME_MAX, ovs_other_config, @@ -297,6 +300,12 @@ dpdk_init__(const struct smap *ovs_other_config) vhost_sock_dir = sock_dir_subcomponent; } + /* Configure EMC insertion probability */ + insert_prob = smap_get_int(ovs_other_config, "emc-insert-prob", -1); + if (insert_prob >= 0 && insert_prob <= UINT32_MAX) { + emc_insert_min = insert_prob == 0 ? 0 : UINT32_MAX / insert_prob; + } + argv = grow_argv(&argv, 0, 1); argc = 1; argv[0] = xstrdup(ovs_get_program_name()); @@ -438,6 +447,12 @@ dpdk_get_vhost_sock_dir(void) return vhost_sock_dir; } +uint32_t +dpdk_get_emc_insert_min(void) +{ + return emc_insert_min; +} + void dpdk_set_lcore_id(unsigned cpu) { diff --git a/lib/dpdk.h b/lib/dpdk.h index 673a1f1..ebe614a 100644 --- a/lib/dpdk.h +++ b/lib/dpdk.h @@ -35,5 +35,6 @@ struct smap; void dpdk_init(const struct smap *ovs_other_config); void dpdk_set_lcore_id(unsigned cpu); const char *dpdk_get_vhost_sock_dir(void); +uint32_t dpdk_get_emc_insert_min(void); #endif /* dpdk.h */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 42631bc..71a54d1 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -33,6 +33,7 @@ #ifdef DPDK_NETDEV #include <rte_cycles.h> +#include "dpdk.h" #endif #include "bitmap.h" @@ -144,6 +145,8 @@ struct netdev_flow_key { #define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1) #define EM_FLOW_HASH_SEGS 2 +uint32_t em_flow_insert_min = UINT32_MAX / 100; + struct emc_entry { struct dp_netdev_flow *flow; struct netdev_flow_key key; /* key.hash used for emc hash value. */ @@ -1943,6 +1946,26 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key, emc_change_entry(to_be_replaced, flow, key); } +static inline void +emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd, + struct emc_cache *cache, + const struct netdev_flow_key *key, + struct dp_netdev_flow *flow) +{ + /* Insert an entry into the EMC based on a probability governed by + * 'em_flow_insert_min'. By default the value is UINT32_MAX / 100 which + * yields an insertion probability of 1/100. This value may be different + * for the DPDK datapath depending on whether or not the user has + * configured the 'emc-insert-prob' option. */ +#ifdef DPDK_NETDEV + if ((key->hash ^ (uint32_t)pmd->last_cycles) < em_flow_insert_min) { +#else + if ((key->hash ^ random_uint32()) < em_flow_insert_min) { +#endif + emc_insert(cache, key, flow); + } +} + static inline struct dp_netdev_flow * emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key) { @@ -3530,6 +3553,7 @@ pmd_thread_main(void *f_) ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); ovs_numa_thread_setaffinity_core(pmd->core_id); dpdk_set_lcore_id(pmd->core_id); + em_flow_insert_min = dpdk_get_emc_insert_min(); poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); reload: emc_cache_init(&pmd->flow_cache); @@ -4191,7 +4215,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet, } ovs_mutex_unlock(&pmd->flow_mutex); - emc_insert(&pmd->flow_cache, key, netdev_flow); + emc_probabilistic_insert(pmd, &pmd->flow_cache, key, netdev_flow); } } @@ -4285,7 +4309,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, flow = dp_netdev_flow_cast(rules[i]); - emc_insert(flow_cache, &keys[i], flow); + emc_probabilistic_insert(pmd, flow_cache, &keys[i], flow); dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches); } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index c71ee29..4c4a4f1 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -316,6 +316,23 @@ </p> </column> + <column name="other_config" key="emc-insert-prob" + type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'> + <p> + Specifies the probability (1/emc-insert-prob) of a flow being + inserted into the Exact Match Cache (EMC). Higher values of + emc-insert-prob will result in less insertions, and lower + values will result in more insertions. A value of 1 will result in + an insertion for every flow whereas a value of zero will result in + no insertions and essentially disable the EMC. + </p> + <p> + Defaults to 100 ie. there is 1/100 chance of EMC insertion. Only + applies to <code>netdev</code> (userspace) bridges. Changing this + value requires restarting the daemon. + </p> + </column> + <column name="other_config" key="n-revalidator-threads" type='{"type": "integer", "minInteger": 1}'> <p> -- 2.4.11 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
