For now, XDP_RING_NEED_WAKEUP is not supported properly by the virtio-net.
Take the tx path for example, we set xsk_set_tx_need_wakeup() in
virtnet_xsk_xmit(), but we didn't call xsk_clear_tx_need_wakeup()
anywhere, which means the user will call send() for every packet.

For the tx path, we will call xsk_set_tx_need_wakeup() after
virtnet_xsk_xmit_batch() if sq->vq is empty, as we can't be wakeup by the
skb_xmit_done() in this case. Otherwise, we will clear the wakeup flag.

For the rx path, we will call xsk_set_rx_need_wakeup() if we have free
buffers in rq->vq.

Race condition is considered for both rx and tx path.

Signed-off-by: Menglong Dong <[email protected]>
---
 drivers/net/virtio_net.c | 52 ++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9b3da9f9786c..25e895b849a6 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1323,16 +1323,27 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info 
*vi, struct receive_queue
                                   struct xsk_buff_pool *pool, gfp_t gfp)
 {
        struct xdp_buff **xsk_buffs;
+       bool need_wakeup;
        dma_addr_t addr;
        int err = 0;
        u32 len, i;
        int num;
 
+       need_wakeup = xsk_uses_need_wakeup(pool);
        xsk_buffs = rq->xsk_buffs;
 
+       /* If both rq->vq and fill ring are empty, and then the user submit
+        * all the chunks to the fill ring and check the wake up flag
+        * after xsk_buff_alloc_batch() and before xsk_set_rx_need_wakeup(),
+        * we will lose the chance to wake up the rx napi, so we have to
+        * set the need_wakeup flag here.
+        */
+       if (need_wakeup && virtqueue_get_vring_size(rq->vq) == rq->vq->num_free)
+               xsk_set_rx_need_wakeup(pool);
+
        num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free);
        if (!num) {
-               if (xsk_uses_need_wakeup(pool)) {
+               if (need_wakeup) {
                        xsk_set_rx_need_wakeup(pool);
                        /* Return 0 instead of -ENOMEM so that NAPI is
                         * descheduled.
@@ -1341,8 +1352,6 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info 
*vi, struct receive_queue
                }
 
                return -ENOMEM;
-       } else {
-               xsk_clear_rx_need_wakeup(pool);
        }
 
        len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len;
@@ -1363,6 +1372,16 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info 
*vi, struct receive_queue
                        goto err;
        }
 
+       if (need_wakeup) {
+               if (rq->vq->num_free)
+                       /* We have free buffers, so we'd better wake up the
+                        * rx napi as soon as possible.
+                        */
+                       xsk_set_rx_need_wakeup(pool);
+               else
+                       xsk_clear_rx_need_wakeup(pool);
+       }
+
        return num;
 
 err:
@@ -1440,8 +1459,9 @@ static bool virtnet_xsk_xmit(struct send_queue *sq, 
struct xsk_buff_pool *pool,
        struct virtnet_info *vi = sq->vq->vdev->priv;
        struct virtnet_sq_free_stats stats = {};
        struct net_device *dev = vi->dev;
+       int sent, vring_size;
+       bool need_wakeup;
        u64 kicks = 0;
-       int sent;
 
        /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of
         * free_old_xmit().
@@ -1451,8 +1471,29 @@ static bool virtnet_xsk_xmit(struct send_queue *sq, 
struct xsk_buff_pool *pool,
        if (stats.xsk)
                xsk_tx_completed(sq->xsk_pool, stats.xsk);
 
+       vring_size = virtqueue_get_vring_size(sq->vq);
+       need_wakeup = xsk_uses_need_wakeup(pool);
+       /* If the sq->vq is empty, and the tx ring is empty, and the user
+        * submit an entry to the tx ring after virtnet_xsk_xmit_batch() and
+        * before xsk_set_tx_need_wakeup(), we will lose the chance to wake
+        * up the tx napi, so we have to set the need_wakeup flag here.
+        */
+       if (need_wakeup && vring_size == sq->vq->num_free)
+               xsk_set_tx_need_wakeup(pool);
+
        sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks);
 
+       if (need_wakeup) {
+               if (vring_size == sq->vq->num_free)
+                       /* we can't wake up by ourself, and it should be done
+                        * by the user.
+                        */
+                       xsk_set_tx_need_wakeup(pool);
+               else
+                       /* we can wake up from skb_xmit_done() */
+                       xsk_clear_tx_need_wakeup(pool);
+       }
+
        if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
                check_sq_full_and_disable(vi, vi->dev, sq);
 
@@ -1470,9 +1511,6 @@ static bool virtnet_xsk_xmit(struct send_queue *sq, 
struct xsk_buff_pool *pool,
        u64_stats_add(&sq->stats.xdp_tx,  sent);
        u64_stats_update_end(&sq->stats.syncp);
 
-       if (xsk_uses_need_wakeup(pool))
-               xsk_set_tx_need_wakeup(pool);
-
        return sent;
 }
 
-- 
2.54.0


Reply via email to