On Tue, Jul 8, 2025 at 8:48 AM Jason Wang <jasow...@redhat.com> wrote: > > This patch introduces basic in-order support for vhost-net. By > recording the number of batched buffers in an array when calling > `vhost_add_used_and_signal_n()`, we can reduce the number of userspace > accesses. Note that the vhost-net batching logic is kept as we still > count the number of buffers there. > > Testing Results: > > With testpmd: > > - TX: txonly mode + vhost_net with XDP_DROP on TAP shows a 17.5% > improvement, from 4.75 Mpps to 5.35 Mpps. > - RX: No obvious improvements were observed. > > With virtio-ring in-order experimental code in the guest: > > - TX: pktgen in the guest + XDP_DROP on TAP shows a 19% improvement, > from 5.2 Mpps to 6.2 Mpps. > - RX: pktgen on TAP with vhost_net + XDP_DROP in the guest achieves a > 6.1% improvement, from 3.47 Mpps to 3.61 Mpps. > > Signed-off-by: Jason Wang <jasow...@redhat.com>
Acked-by: Eugenio Pérez <epere...@redhat.com> Thanks! > --- > drivers/vhost/net.c | 86 ++++++++++++++++++++++++++++++++------------- > 1 file changed, 61 insertions(+), 25 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 4f9c67f17b49..8ac994b3228a 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -74,7 +74,8 @@ enum { > (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | > (1ULL << VIRTIO_NET_F_MRG_RXBUF) | > (1ULL << VIRTIO_F_ACCESS_PLATFORM) | > - (1ULL << VIRTIO_F_RING_RESET) > + (1ULL << VIRTIO_F_RING_RESET) | > + (1ULL << VIRTIO_F_IN_ORDER) > }; > > enum { > @@ -450,7 +451,8 @@ static int vhost_net_enable_vq(struct vhost_net *n, > return vhost_poll_start(poll, sock->file); > } > > -static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) > +static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq, > + unsigned int count) > { > struct vhost_virtqueue *vq = &nvq->vq; > struct vhost_dev *dev = vq->dev; > @@ -458,8 +460,8 @@ static void vhost_net_signal_used(struct > vhost_net_virtqueue *nvq) > if (!nvq->done_idx) > return; > > - vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL, > - nvq->done_idx); > + vhost_add_used_and_signal_n(dev, vq, vq->heads, > + vq->nheads, count); > nvq->done_idx = 0; > } > > @@ -468,6 +470,8 @@ static void vhost_tx_batch(struct vhost_net *net, > struct socket *sock, > struct msghdr *msghdr) > { > + struct vhost_virtqueue *vq = &nvq->vq; > + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); > struct tun_msg_ctl ctl = { > .type = TUN_MSG_PTR, > .num = nvq->batched_xdp, > @@ -475,6 +479,11 @@ static void vhost_tx_batch(struct vhost_net *net, > }; > int i, err; > > + if (in_order) { > + vq->heads[0].len = 0; > + vq->nheads[0] = nvq->done_idx; > + } > + > if (nvq->batched_xdp == 0) > goto signal_used; > > @@ -496,7 +505,7 @@ static void vhost_tx_batch(struct vhost_net *net, > } > > signal_used: > - vhost_net_signal_used(nvq); > + vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx); > nvq->batched_xdp = 0; > } > > @@ -758,6 +767,7 @@ static void handle_tx_copy(struct vhost_net *net, struct > socket *sock) > int sent_pkts = 0; > bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); > bool busyloop_intr; > + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); > > do { > busyloop_intr = false; > @@ -794,11 +804,13 @@ static void handle_tx_copy(struct vhost_net *net, > struct socket *sock) > break; > } > > - /* We can't build XDP buff, go for single > - * packet path but let's flush batched > - * packets. > - */ > - vhost_tx_batch(net, nvq, sock, &msg); > + if (nvq->batched_xdp) { > + /* We can't build XDP buff, go for single > + * packet path but let's flush batched > + * packets. > + */ > + vhost_tx_batch(net, nvq, sock, &msg); > + } > msg.msg_control = NULL; > } else { > if (tx_can_batch(vq, total_len)) > @@ -819,8 +831,12 @@ static void handle_tx_copy(struct vhost_net *net, struct > socket *sock) > pr_debug("Truncated TX packet: len %d != %zd\n", > err, len); > done: > - vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); > - vq->heads[nvq->done_idx].len = 0; > + if (in_order) { > + vq->heads[0].id = cpu_to_vhost32(vq, head); > + } else { > + vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, > head); > + vq->heads[nvq->done_idx].len = 0; > + } > ++nvq->done_idx; > } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); > > @@ -999,7 +1015,7 @@ static int peek_head_len(struct vhost_net_virtqueue > *rvq, struct sock *sk) > } > > static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, > - bool *busyloop_intr) > + bool *busyloop_intr, unsigned int count) > { > struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; > struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; > @@ -1009,7 +1025,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net > *net, struct sock *sk, > > if (!len && rvq->busyloop_timeout) { > /* Flush batched heads first */ > - vhost_net_signal_used(rnvq); > + vhost_net_signal_used(rnvq, count); > /* Both tx vq and rx socket were polled here */ > vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); > > @@ -1021,7 +1037,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net > *net, struct sock *sk, > > /* This is a multi-buffer version of vhost_get_desc, that works if > * vq has read descriptors only. > - * @vq - the relevant virtqueue > + * @nvq - the relevant vhost_net virtqueue > * @datalen - data length we'll be reading > * @iovcount - returned count of io vectors we fill > * @log - vhost log > @@ -1029,14 +1045,17 @@ static int vhost_net_rx_peek_head_len(struct > vhost_net *net, struct sock *sk, > * @quota - headcount quota, 1 for big buffer > * returns number of buffer heads allocated, negative on error > */ > -static int get_rx_bufs(struct vhost_virtqueue *vq, > +static int get_rx_bufs(struct vhost_net_virtqueue *nvq, > struct vring_used_elem *heads, > + u16 *nheads, > int datalen, > unsigned *iovcount, > struct vhost_log *log, > unsigned *log_num, > unsigned int quota) > { > + struct vhost_virtqueue *vq = &nvq->vq; > + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); > unsigned int out, in; > int seg = 0; > int headcount = 0; > @@ -1073,14 +1092,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, > nlogs += *log_num; > log += *log_num; > } > - heads[headcount].id = cpu_to_vhost32(vq, d); > len = iov_length(vq->iov + seg, in); > - heads[headcount].len = cpu_to_vhost32(vq, len); > - datalen -= len; > + if (!in_order) { > + heads[headcount].id = cpu_to_vhost32(vq, d); > + heads[headcount].len = cpu_to_vhost32(vq, len); > + } > ++headcount; > + datalen -= len; > seg += in; > } > - heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); > + > *iovcount = seg; > if (unlikely(log)) > *log_num = nlogs; > @@ -1090,6 +1111,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, > r = UIO_MAXIOV + 1; > goto err; > } > + > + if (!in_order) > + heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); > + else { > + heads[0].len = cpu_to_vhost32(vq, len + datalen); > + heads[0].id = cpu_to_vhost32(vq, d); > + nheads[0] = headcount; > + } > + > return headcount; > err: > vhost_discard_vq_desc(vq, headcount); > @@ -1102,6 +1132,8 @@ static void handle_rx(struct vhost_net *net) > { > struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; > struct vhost_virtqueue *vq = &nvq->vq; > + bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER); > + unsigned int count = 0; > unsigned in, log; > struct vhost_log *vq_log; > struct msghdr msg = { > @@ -1149,12 +1181,13 @@ static void handle_rx(struct vhost_net *net) > > do { > sock_len = vhost_net_rx_peek_head_len(net, sock->sk, > - &busyloop_intr); > + &busyloop_intr, count); > if (!sock_len) > break; > sock_len += sock_hlen; > vhost_len = sock_len + vhost_hlen; > - headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, > + headcount = get_rx_bufs(nvq, vq->heads + count, > + vq->nheads + count, > vhost_len, &in, vq_log, &log, > likely(mergeable) ? UIO_MAXIOV : 1); > /* On error, stop handling until the next kick. */ > @@ -1230,8 +1263,11 @@ static void handle_rx(struct vhost_net *net) > goto out; > } > nvq->done_idx += headcount; > - if (nvq->done_idx > VHOST_NET_BATCH) > - vhost_net_signal_used(nvq); > + count += in_order ? 1 : headcount; > + if (nvq->done_idx > VHOST_NET_BATCH) { > + vhost_net_signal_used(nvq, count); > + count = 0; > + } > if (unlikely(vq_log)) > vhost_log_write(vq, vq_log, log, vhost_len, > vq->iov, in); > @@ -1243,7 +1279,7 @@ static void handle_rx(struct vhost_net *net) > else if (!sock_len) > vhost_net_enable_vq(net, vq); > out: > - vhost_net_signal_used(nvq); > + vhost_net_signal_used(nvq, count); > mutex_unlock(&vq->mutex); > } > > -- > 2.31.1 >