Conditional EMC insert helps a lot in scenarios with high numbers
of parallel flows, but in current implementation this option affects
all the threads and ports at once. There are scenarios there we have
different number of flows on different ports. For example, if one
of the VMs encapsulates traffic using additional headers, it will
recieve large number of flows but only few flows will come out of
this VM. In this scenario it's much faster to use EMC instead of
classifier for traffic from the VM, but it's better to disable EMC
for the traffic which flows to VM.

To handle above issue 'emc-insert-inv-prob' was converted to per-port
option. Default value and behaviour kept as is.

For example, following command sets the insertion probability for
packets that came from port 'dpdk0' to ~1/20, i.e. ~5%:

  ovs-vsctl set interface dpdk0 other_config:emc-insert-inv-prob=20

Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
---
 Documentation/howto/dpdk.rst |   4 +-
 NEWS                         |   2 +-
 lib/dpif-netdev.c            | 106 +++++++++++++++++++++++++++++--------------
 tests/pmd.at                 |   7 ++-
 vswitchd/vswitch.xml         |  42 ++++++++++-------
 5 files changed, 106 insertions(+), 55 deletions(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index d7f6610..c620961 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -389,9 +389,9 @@ EMC Insertion Probability
 -------------------------
 By default 1 in every 100 flows are inserted into the Exact Match Cache (EMC).
 It is possible to change this insertion probability by setting the
-``emc-insert-inv-prob`` option::
+``emc-insert-inv-prob`` option for the desired interface::
 
-    $ ovs-vsctl --no-wait set Open_vSwitch . other_config:emc-insert-inv-prob=N
+    $ ovs-vsctl set interface <iface> other_config:emc-insert-inv-prob=N
 
 where:
 
diff --git a/NEWS b/NEWS
index 66eb936..a7bfdaf 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,6 @@
 Post-v2.8.0
 --------------------
-   - Nothing yet.
+   - EMC insertion probability turned to per-port other_config.
 
 v2.8.0 - xx xxx xxxx
 ---------------------
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 6d42393..94e7bc4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -266,9 +266,6 @@ struct dp_netdev {
     struct ovs_mutex meter_locks[N_METER_LOCKS];
     struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
 
-    /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
-    OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
-
     /* Protects access to ofproto-dpif-upcall interface during revalidator
      * thread synchronization. */
     struct fat_rwlock upcall_rwlock;
@@ -364,6 +361,8 @@ struct dp_netdev_port {
     unsigned n_rxq;             /* Number of elements in 'rxqs' */
     unsigned *txq_used;         /* Number of threads that use each tx queue. */
     struct ovs_mutex txq_used_mutex;
+    uint32_t emc_insert_min;    /* Probability of EMC insertions is a factor
+                                 * of 'emc_insert_min'. */
     char *type;                 /* Port type as requested by user. */
     char *rxq_affinity_list;    /* Requested affinity of rx queues. */
 };
@@ -487,6 +486,7 @@ struct dp_netdev_pmd_cycles {
 struct polled_queue {
     struct netdev_rxq *rx;
     odp_port_t port_no;
+    uint32_t emc_insert_min;
 };
 
 /* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */
@@ -508,6 +508,8 @@ struct tx_port {
 struct dp_netdev_pmd_thread_ctx {
     /* Latest measured time. */
     long long now;
+    /* EMC insertion probability context for the current processing cycle. */
+    uint32_t emc_insert_min;
     /* Used to count cycles. See 'cycles_count_end()' */
     unsigned long long last_cycles;
 };
@@ -1202,8 +1204,6 @@ create_dp_netdev(const char *name, const struct 
dpif_class *class,
 
     conntrack_init(&dp->conntrack);
 
-    atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
-
     cmap_init(&dp->poll_threads);
 
     ovs_mutex_init(&dp->tx_qid_pool_mutex);
@@ -1485,6 +1485,7 @@ port_create(const char *devname, const char *type,
     port->netdev = netdev;
     port->type = xstrdup(type);
     port->sf = sf;
+    port->emc_insert_min = DEFAULT_EM_FLOW_INSERT_MIN;
     port->need_reconfigure = true;
     ovs_mutex_init(&port->txq_used_mutex);
 
@@ -2104,8 +2105,7 @@ emc_probabilistic_insert(struct dp_netdev_pmd_thread *pmd,
      * default the value is UINT32_MAX / 100 which yields an insertion
      * probability of 1/100 ie. 1% */
 
-    uint32_t min;
-    atomic_read_relaxed(&pmd->dp->emc_insert_min, &min);
+    uint32_t min = pmd->ctx.emc_insert_min;
 
     if (min && random_uint32() <= min) {
         emc_insert(&pmd->flow_cache, key, flow);
@@ -2914,10 +2914,6 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     const char *cmask = smap_get(other_config, "pmd-cpu-mask");
-    unsigned long long insert_prob =
-        smap_get_ullong(other_config, "emc-insert-inv-prob",
-                        DEFAULT_EM_FLOW_INSERT_INV_PROB);
-    uint32_t insert_min, cur_min;
 
     if (!nullable_string_is_equal(dp->pmd_cmask, cmask)) {
         free(dp->pmd_cmask);
@@ -2925,24 +2921,6 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
smap *other_config)
         dp_netdev_request_reconfigure(dp);
     }
 
-    atomic_read_relaxed(&dp->emc_insert_min, &cur_min);
-    if (insert_prob <= UINT32_MAX) {
-        insert_min = insert_prob == 0 ? 0 : UINT32_MAX / insert_prob;
-    } else {
-        insert_min = DEFAULT_EM_FLOW_INSERT_MIN;
-        insert_prob = DEFAULT_EM_FLOW_INSERT_INV_PROB;
-    }
-
-    if (insert_min != cur_min) {
-        atomic_store_relaxed(&dp->emc_insert_min, insert_min);
-        if (insert_min == 0) {
-            VLOG_INFO("EMC has been disabled");
-        } else {
-            VLOG_INFO("EMC insertion probability changed to 1/%llu (~%.2f%%)",
-                      insert_prob, (100 / (float)insert_prob));
-        }
-    }
-
     return 0;
 }
 
@@ -3005,6 +2983,27 @@ exit:
     return error;
 }
 
+/* Returns 'true' if one of the 'port's RX queues exists in 'poll_list'
+ * of given PMD thread. */
+static bool
+dpif_netdev_pmd_polls_port(struct dp_netdev_pmd_thread *pmd,
+                           struct dp_netdev_port *port)
+    OVS_EXCLUDED(pmd->port_mutex)
+{
+    struct rxq_poll *poll;
+    bool found = false;
+
+    ovs_mutex_lock(&pmd->port_mutex);
+    HMAP_FOR_EACH (poll, node, &pmd->poll_list) {
+        if (port == poll->rxq->port) {
+            found = true;
+            break;
+        }
+    }
+    ovs_mutex_unlock(&pmd->port_mutex);
+    return found;
+}
+
 /* Changes the affinity of port's rx queues.  The changes are actually applied
  * in dpif_netdev_run(). */
 static int
@@ -3015,10 +3014,50 @@ dpif_netdev_port_set_config(struct dpif *dpif, 
odp_port_t port_no,
     struct dp_netdev_port *port;
     int error = 0;
     const char *affinity_list = smap_get(cfg, "pmd-rxq-affinity");
+    unsigned long long insert_prob =
+        smap_get_ullong(cfg, "emc-insert-inv-prob",
+                        DEFAULT_EM_FLOW_INSERT_INV_PROB);
+    uint32_t insert_min;
 
     ovs_mutex_lock(&dp->port_mutex);
     error = get_port_by_number(dp, port_no, &port);
-    if (error || !netdev_is_pmd(port->netdev)
+    if (error) {
+        goto unlock;
+    }
+
+    /* Checking for EMC insert probability changes. */
+    if (insert_prob <= UINT32_MAX) {
+        insert_min = insert_prob == 0 ? 0 : UINT32_MAX / insert_prob;
+    } else {
+        insert_min = DEFAULT_EM_FLOW_INSERT_MIN;
+        insert_prob = DEFAULT_EM_FLOW_INSERT_INV_PROB;
+    }
+
+    if (insert_min != port->emc_insert_min) {
+        struct dp_netdev_pmd_thread *pmd;
+
+        port->emc_insert_min = insert_min;
+        /* Mark for reload all the threads that polls this port and request
+         * for reconfiguration for the actual reloading of threads. */
+        CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+            if (dpif_netdev_pmd_polls_port(pmd, port)) {
+                pmd->need_reload = true;
+            }
+        }
+        dp_netdev_request_reconfigure(dp);
+
+        if (insert_min == 0) {
+            VLOG_INFO("%s: EMC has been disabled",
+                       netdev_get_name(port->netdev));
+        } else {
+            VLOG_INFO("%s: EMC insertion probability changed to 1/%llu"
+                      " (~%.2f%%)", netdev_get_name(port->netdev),
+                      insert_prob, (100 / (float)insert_prob));
+        }
+    }
+
+    /* Checking for RXq affinity changes. */
+    if (!netdev_is_pmd(port->netdev)
         || nullable_string_is_equal(affinity_list, port->rxq_affinity_list)) {
         goto unlock;
     }
@@ -3680,6 +3719,7 @@ dpif_netdev_run(struct dpif *dpif)
                 int i;
 
                 for (i = 0; i < port->n_rxq; i++) {
+                    non_pmd->ctx.emc_insert_min = port->emc_insert_min;
                     process_packets =
                         dp_netdev_process_rxq_port(non_pmd,
                                                    port->rxqs[i].rx,
@@ -3818,6 +3858,7 @@ pmd_load_queues_and_ports(struct dp_netdev_pmd_thread 
*pmd,
     HMAP_FOR_EACH (poll, node, &pmd->poll_list) {
         poll_list[i].rx = poll->rxq->rx;
         poll_list[i].port_no = poll->rxq->port->port_no;
+        poll_list[i].emc_insert_min = poll->rxq->port->emc_insert_min;
         i++;
     }
 
@@ -3869,6 +3910,7 @@ reload:
     cycles_count_start(pmd);
     for (;;) {
         for (i = 0; i < poll_cnt; i++) {
+            pmd->ctx.emc_insert_min = poll_list[i].emc_insert_min;
             process_packets =
                 dp_netdev_process_rxq_port(pmd, poll_list[i].rx,
                                            poll_list[i].port_no);
@@ -4693,11 +4735,9 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
     size_t n_missed = 0, n_dropped = 0;
     struct dp_packet *packet;
     const size_t size = dp_packet_batch_size(packets_);
-    uint32_t cur_min;
+    uint32_t cur_min = pmd->ctx.emc_insert_min;
     int i;
 
-    atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
-
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
         struct dp_netdev_flow *flow;
 
diff --git a/tests/pmd.at b/tests/pmd.at
index b6732ea..fdb2eee 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -151,7 +151,11 @@ AT_CLEANUP
 
 
 AT_SETUP([PMD - stats])
-OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 ofport_request=7 
type=dummy-pmd options:n_rxq=4],
+OVS_VSWITCHD_START([add-port br0 p0 --\
+                        set Interface p0 ofport_request=7 \
+                                         type=dummy-pmd \
+                                         other_config:emc-insert-inv-prob=1 \
+                                         options:n_rxq=4 ],
                    [], [], [DUMMY_NUMA])
 
 CHECK_CPU_DISCOVERED()
@@ -159,7 +163,6 @@ CHECK_PMD_THREADS_CREATED()
 
 AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg])
 AT_CHECK([ovs-ofctl add-flow br0 action=normal])
-AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:emc-insert-inv-prob=1])
 
 sleep 1
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 074535b..90eb3ea 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -380,23 +380,6 @@
         </p>
       </column>
 
-      <column name="other_config" key="emc-insert-inv-prob"
-              type='{"type": "integer", "minInteger": 0, "maxInteger": 
4294967295}'>
-        <p>
-          Specifies the inverse probability (1/emc-insert-inv-prob) of a flow
-          being inserted into the Exact Match Cache (EMC). On average one in
-          every <code>emc-insert-inv-prob</code> packets that generate a unique
-          flow will cause an insertion into the EMC.
-
-          A value of 1 will result in an insertion for every flow (1/1 = 100%)
-          whereas a value of zero will result in no insertions and essentially
-          disable the EMC.
-        </p>
-        <p>
-          Defaults to 100 ie. there is (1/100 =) 1% chance of EMC insertion.
-        </p>
-      </column>
-
       <column name="other_config" key="vlan-limit"
               type='{"type": "integer", "minInteger": 0}'>
         <p>
@@ -2644,6 +2627,31 @@
       </column>
     </group>
 
+    <group title="EMC (Exact Match Cache) Configuration">
+      <p>
+        These settings controls behaviour of EMC lookups/insertions for packets
+        received from the interface.
+      </p>
+
+      <column name="other_config" key="emc-insert-inv-prob"
+              type='{"type": "integer",
+                     "minInteger": 0, "maxInteger": 4294967295}'>
+        <p>
+          Specifies the inverse probability (1/emc-insert-inv-prob) of a flow
+          being inserted into the Exact Match Cache (EMC) for this interface.
+          On average one in every <code>emc-insert-inv-prob</code> packets that
+          generate a unique flow will cause an insertion into the EMC.
+
+          A value of 1 will result in an insertion for every flow (1/1 = 100%)
+          whereas a value of zero will result in no insertions and essentially
+          disable the EMC for this interface.
+        </p>
+        <p>
+          Defaults to 100 ie. there is (1/100 =) 1% chance of EMC insertion.
+        </p>
+      </column>
+    </group>
+
     <group title="MTU">
       <p>
         The MTU (maximum transmission unit) is the largest amount of data
-- 
2.7.4

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to