On 5 Nov 2019, at 23:16, William Tu wrote:

The RFC patch enables shared umem support. It requires kernel change and libbpf change, I will post it in another thread. I tested with multiple
afxdp ports using skb mode.  For example:
ovs-vsctl -- set interface afxdp-p0 options:n_rxq=1 type="afxdp" options:xdpmode=skb ovs-vsctl -- set interface afxdp-p1 options:n_rxq=1 type="afxdp" options:xdpmode=skb
Will share one umem instead of two.

Note that once shared umem is created with a specific mode (ex: XDP_COPY), the netdev that shares this umem can not change its mode. So I'm thinking about using just one shared umem for all skb-mode netdevs, and for the rest drv-mode netdevs, keep using dedicated umem. Or create one umem per mode? So the
drv-mode netdevs also share one umem.


Hi William,

Did not go trough the entire patch, but wasn’t Magnus hinting on not using the shared umem ring’s but just the buffers they point too?

This way you do not need any locking when doing the ring operations, only when getting buffers. Guess this is more like the mbuf implementation in DPDK.

//Eelco

Any comments are welcome.

Suggested-by: Eelco Chaudron <[email protected]>
Signed-off-by: William Tu <[email protected]>
---
lib/netdev-afxdp.c | 97 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 27 deletions(-)

diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 3037770b27cb..42767e1e27f3 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -95,6 +95,8 @@ static void xsk_destroy(struct xsk_socket_info *xsk);
 static int xsk_configure_all(struct netdev *netdev);
 static void xsk_destroy_all(struct netdev *netdev);

+static struct xsk_umem_info *shared_umem;
+
 struct unused_pool {
     struct xsk_umem_info *umem_info;
int lost_in_rings; /* Number of packets left in tx, rx, cq and fq. */
@@ -112,6 +114,8 @@ struct xsk_umem_info {
     struct xsk_ring_cons cq;
     struct xsk_umem *umem;
     void *buffer;
+    int refcount;
+    struct ovs_mutex mutex;
 };

 struct xsk_socket_info {
@@ -228,6 +232,7 @@ xsk_configure_umem(void *buffer, uint64_t size, int xdpmode)
     uconfig.comp_size = CONS_NUM_DESCS;
     uconfig.frame_size = FRAME_SIZE;
     uconfig.frame_headroom = OVS_XDP_HEADROOM;
+    ovs_mutex_init(&umem->mutex);

ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
                            &uconfig);
@@ -296,6 +301,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
     struct xsk_socket_info *xsk;
     char devname[IF_NAMESIZE];
     uint32_t idx = 0, prog_id;
+    bool shared;
     int ret;
     int i;

@@ -304,6 +310,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
     cfg.rx_size = CONS_NUM_DESCS;
     cfg.tx_size = PROD_NUM_DESCS;
     cfg.libbpf_flags = 0;
+    shared = umem->refcount > 1;

     if (xdpmode == XDP_ZEROCOPY) {
         cfg.bind_flags = XDP_ZEROCOPY;
@@ -319,6 +326,10 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
     }
 #endif

+    if (shared) {
+        cfg.bind_flags = XDP_SHARED_UMEM;
+    }
+
     if (if_indextoname(ifindex, devname) == NULL) {
         VLOG_ERR("ifindex %d to devname failed (%s)",
                  ifindex, ovs_strerror(errno));
@@ -352,6 +363,11 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
         return NULL;
     }

+    if (shared) {
+        return xsk;
+    }
+
+    /* Only the first umem needs to go below. */
     while (!xsk_ring_prod__reserve(&xsk->umem->fq,
                                    PROD_NUM_DESCS, &idx)) {
VLOG_WARN_RL(&rl, "Retry xsk_ring_prod__reserve to FILL queue"); @@ -380,33 +396,43 @@ xsk_configure(int ifindex, int xdp_queue_id, int xdpmode,
 {
     struct xsk_socket_info *xsk;
     struct xsk_umem_info *umem;
-    void *bufs;
+    void *bufs = NULL;

     netdev_afxdp_sweep_unused_pools(NULL);

-    /* Umem memory region. */
-    bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
-    memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
+    if (!shared_umem) {
+        /* Umem memory region. */
+        bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
+        memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
+
+        /* Create AF_XDP socket. */
+        umem = xsk_configure_umem(bufs,
+                                  NUM_FRAMES * FRAME_SIZE,
+                                  xdpmode);
+        if (!umem) {
+            free_pagealign(bufs);
+            return NULL;
+        }

-    /* Create AF_XDP socket. */
-    umem = xsk_configure_umem(bufs,
-                              NUM_FRAMES * FRAME_SIZE,
-                              xdpmode);
-    if (!umem) {
-        free_pagealign(bufs);
-        return NULL;
+        shared_umem = umem;
+        umem->refcount++;
+ VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
+    } else {
+        umem = shared_umem;
+        umem->refcount++;
     }

-    VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
-
     xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, xdpmode,
                                use_need_wakeup);
-    if (!xsk) {
+    if (!xsk && !umem->refcount--) {
         /* Clean up umem and xpacket pool. */
+        shared_umem = NULL;
         if (xsk_umem__delete(umem->umem)) {
             VLOG_ERR("xsk_umem__delete failed.");
         }
-        free_pagealign(bufs);
+        if (bufs) {
+            free_pagealign(bufs);
+        }
         umem_pool_cleanup(&umem->mpool);
         xpacket_pool_cleanup(&umem->xpool);
         free(umem);
@@ -472,21 +498,29 @@ xsk_destroy(struct xsk_socket_info *xsk_info)
     xsk_info->xsk = NULL;

     umem = xsk_info->umem->umem;
-    if (xsk_umem__delete(umem)) {
-        VLOG_ERR("xsk_umem__delete failed.");
-    }
+    xsk_info->umem->refcount--;

-    pool = xzalloc(sizeof *pool);
-    pool->umem_info = xsk_info->umem;
- pool->lost_in_rings = xsk_info->outstanding_tx + xsk_info->available_rx;
+    if (!xsk_info->umem->refcount) {
+        VLOG_WARN("destroy umem");
+        shared_umem = NULL;
+        if (xsk_umem__delete(umem)) {
+            VLOG_ERR("xsk_umem__delete failed.");
+        }
+        ovs_mutex_destroy(&xsk_info->umem->mutex);

-    ovs_mutex_lock(&unused_pools_mutex);
-    ovs_list_push_back(&unused_pools, &pool->list_node);
-    ovs_mutex_unlock(&unused_pools_mutex);
+        pool = xzalloc(sizeof *pool);
+        pool->umem_info = xsk_info->umem;
+        pool->lost_in_rings = xsk_info->outstanding_tx +
+                              xsk_info->available_rx;

-    free(xsk_info);
+        ovs_mutex_lock(&unused_pools_mutex);
+        ovs_list_push_back(&unused_pools, &pool->list_node);
+        ovs_mutex_unlock(&unused_pools_mutex);

-    netdev_afxdp_sweep_unused_pools(NULL);
+        free(xsk_info);
+
+        netdev_afxdp_sweep_unused_pools(NULL);
+    }
 }

 static void
@@ -733,11 +767,14 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
     prepare_fill_queue(xsk_info);

     umem = xsk_info->umem;
+    ovs_mutex_lock(&umem->mutex);
+
     rx->fd = xsk_socket__fd(xsk_info->xsk);

     rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
     if (!rcvd) {
         xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
+        ovs_mutex_unlock(&umem->mutex);
         return EAGAIN;
     }

@@ -778,6 +815,7 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch, /* TODO: return the number of remaining packets in the queue. */
         *qfill = 0;
     }
+    ovs_mutex_unlock(&umem->mutex);
     return 0;
 }

@@ -924,7 +962,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
     struct netdev_linux *dev = netdev_linux_cast(netdev);
     struct xsk_socket_info *xsk_info;
     void *elems_pop[BATCH_SIZE];
-    struct xsk_umem_info *umem;
+    struct xsk_umem_info *umem = NULL;
     struct dp_packet *packet;
     bool free_batch = false;
     unsigned long orig;
@@ -942,6 +980,8 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
     free_batch = check_free_batch(batch);

     umem = xsk_info->umem;
+    ovs_mutex_lock(&umem->mutex);
+
     ret = umem_elem_pop_n(&umem->mpool, dp_packet_batch_size(batch),
                           elems_pop);
     if (OVS_UNLIKELY(ret)) {
@@ -993,6 +1033,9 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
     }

 out:
+    if (umem) {
+        ovs_mutex_unlock(&umem->mutex);
+    }
     if (free_batch) {
         free_afxdp_buf_batch(batch);
     } else {
--
2.7.4

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to