DPDK mempools are freed when they are no longer needed.
This can happen when a port is removed or a port's mtu
is reconfigured so that a new mempool is used.

It is possible that an mbuf is attempted to be returned
to a freed mempool from NIC Tx queues and this can lead
to a segfault.

In order to prevent this, only free mempools when they
are not needed and have no in-use mbufs. As this might
not be possible immediately, sweep the mempools anytime
a port tries to get a mempool.

Fixes: 8d38823bdf8b ("netdev-dpdk: fix memory leak")
Cc: mark.b.kavanag...@gmail.com
Cc: Ilya Maximets <i.maxim...@samsung.com>
Reported-by: Venkatesan Pradeep <venkatesan.prad...@ericsson.com>
Signed-off-by: Kevin Traynor <ktray...@redhat.com>
---

v3: Get number of entries on the mempool ring directly.

v2: Add second call to rte_mempool_full() as it is not atomic
    so we can't trust one call to it.

 lib/netdev-dpdk.c | 58 ++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index c19cedc..d8fb222 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -590,8 +590,42 @@ dpdk_mp_create(int socket_id, int mtu)
 }
 
+static int
+dpdk_mp_full(const struct rte_mempool *mp) OVS_REQUIRES(dpdk_mp_mutex)
+{
+    unsigned ring_count;
+    /* This logic is needed because rte_mempool_full() is not guaranteed to
+     * be atomic and mbufs could be moved from mempool cache --> mempool ring
+     * during the call. However, as no mbufs will be taken from the mempool
+     * at this time, we can work around it by also checking the ring entries
+     * separately and ensuring that they have not changed.
+     */
+    ring_count = rte_mempool_ops_get_count(mp);
+    if (rte_mempool_full(mp) && rte_mempool_ops_get_count(mp) == ring_count) {
+        return 1;
+    }
+
+    return 0;
+}
+
+/* Free unused mempools. */
+static void
+dpdk_mp_sweep(void) OVS_REQUIRES(dpdk_mp_mutex)
+{
+    struct dpdk_mp *dmp, *next;
+
+    LIST_FOR_EACH_SAFE (dmp, next, list_node, &dpdk_mp_list) {
+        if (!dmp->refcount && dpdk_mp_full(dmp->mp)) {
+            ovs_list_remove(&dmp->list_node);
+            rte_mempool_free(dmp->mp);
+            rte_free(dmp);
+        }
+    }
+}
+
 static struct dpdk_mp *
 dpdk_mp_get(int socket_id, int mtu)
 {
     struct dpdk_mp *dmp;
+    bool reuse = false;
 
     ovs_mutex_lock(&dpdk_mp_mutex);
@@ -599,15 +633,18 @@ dpdk_mp_get(int socket_id, int mtu)
         if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
             dmp->refcount++;
-            goto out;
+            reuse = true;
+            break;
         }
     }
+    /* Sweep mempools after reuse or before create. */
+    dpdk_mp_sweep();
 
-    dmp = dpdk_mp_create(socket_id, mtu);
-    if (dmp) {
-        ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
+    if (!reuse) {
+        dmp = dpdk_mp_create(socket_id, mtu);
+        if (dmp) {
+            ovs_list_push_back(&dpdk_mp_list, &dmp->list_node);
+        }
     }
 
-out:
-
     ovs_mutex_unlock(&dpdk_mp_mutex);
 
@@ -615,5 +652,5 @@ out:
 }
 
-/* Release an existing mempool. */
+/* Decrement reference to a mempool. */
 static void
 dpdk_mp_put(struct dpdk_mp *dmp)
@@ -625,10 +662,5 @@ dpdk_mp_put(struct dpdk_mp *dmp)
     ovs_mutex_lock(&dpdk_mp_mutex);
     ovs_assert(dmp->refcount);
-
-    if (!--dmp->refcount) {
-        ovs_list_remove(&dmp->list_node);
-        rte_mempool_free(dmp->mp);
-        rte_free(dmp);
-     }
+    dmp->refcount--;
     ovs_mutex_unlock(&dpdk_mp_mutex);
 }
-- 
1.8.3.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to