For now, XDP_RING_NEED_WAKEUP is not supported properly by the virtio-net. Take the tx path for example, we set xsk_set_tx_need_wakeup() in virtnet_xsk_xmit(), but we didn't call xsk_clear_tx_need_wakeup() anywhere, which means the user will call send() for every packet.
For the tx path, we will call xsk_set_tx_need_wakeup() after virtnet_xsk_xmit_batch() if sq->vq is empty, as we can't be wakeup by the skb_xmit_done() in this case. Otherwise, we will clear the wakeup flag. For the rx path, we will call xsk_set_rx_need_wakeup() if we have free buffers in rq->vq. Race condition is considered for both rx and tx path. Signed-off-by: Menglong Dong <[email protected]> --- drivers/net/virtio_net.c | 52 ++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9b3da9f9786c..25e895b849a6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1323,16 +1323,27 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue struct xsk_buff_pool *pool, gfp_t gfp) { struct xdp_buff **xsk_buffs; + bool need_wakeup; dma_addr_t addr; int err = 0; u32 len, i; int num; + need_wakeup = xsk_uses_need_wakeup(pool); xsk_buffs = rq->xsk_buffs; + /* If both rq->vq and fill ring are empty, and then the user submit + * all the chunks to the fill ring and check the wake up flag + * after xsk_buff_alloc_batch() and before xsk_set_rx_need_wakeup(), + * we will lose the chance to wake up the rx napi, so we have to + * set the need_wakeup flag here. + */ + if (need_wakeup && virtqueue_get_vring_size(rq->vq) == rq->vq->num_free) + xsk_set_rx_need_wakeup(pool); + num = xsk_buff_alloc_batch(pool, xsk_buffs, rq->vq->num_free); if (!num) { - if (xsk_uses_need_wakeup(pool)) { + if (need_wakeup) { xsk_set_rx_need_wakeup(pool); /* Return 0 instead of -ENOMEM so that NAPI is * descheduled. @@ -1341,8 +1352,6 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue } return -ENOMEM; - } else { - xsk_clear_rx_need_wakeup(pool); } len = xsk_pool_get_rx_frame_size(pool) + vi->hdr_len; @@ -1363,6 +1372,16 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue goto err; } + if (need_wakeup) { + if (rq->vq->num_free) + /* We have free buffers, so we'd better wake up the + * rx napi as soon as possible. + */ + xsk_set_rx_need_wakeup(pool); + else + xsk_clear_rx_need_wakeup(pool); + } + return num; err: @@ -1440,8 +1459,9 @@ static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, struct virtnet_info *vi = sq->vq->vdev->priv; struct virtnet_sq_free_stats stats = {}; struct net_device *dev = vi->dev; + int sent, vring_size; + bool need_wakeup; u64 kicks = 0; - int sent; /* Avoid to wakeup napi meanless, so call __free_old_xmit instead of * free_old_xmit(). @@ -1451,8 +1471,29 @@ static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, if (stats.xsk) xsk_tx_completed(sq->xsk_pool, stats.xsk); + vring_size = virtqueue_get_vring_size(sq->vq); + need_wakeup = xsk_uses_need_wakeup(pool); + /* If the sq->vq is empty, and the tx ring is empty, and the user + * submit an entry to the tx ring after virtnet_xsk_xmit_batch() and + * before xsk_set_tx_need_wakeup(), we will lose the chance to wake + * up the tx napi, so we have to set the need_wakeup flag here. + */ + if (need_wakeup && vring_size == sq->vq->num_free) + xsk_set_tx_need_wakeup(pool); + sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); + if (need_wakeup) { + if (vring_size == sq->vq->num_free) + /* we can't wake up by ourself, and it should be done + * by the user. + */ + xsk_set_tx_need_wakeup(pool); + else + /* we can wake up from skb_xmit_done() */ + xsk_clear_tx_need_wakeup(pool); + } + if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) check_sq_full_and_disable(vi, vi->dev, sq); @@ -1470,9 +1511,6 @@ static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, u64_stats_add(&sq->stats.xdp_tx, sent); u64_stats_update_end(&sq->stats.syncp); - if (xsk_uses_need_wakeup(pool)) - xsk_set_tx_need_wakeup(pool); - return sent; } -- 2.54.0

