aleksandr.fedorov_itglobal.com updated this revision to Diff 57577.
aleksandr.fedorov_itglobal.com added a comment.


  Fix incorrect usage of 'cur_rx_ring' on TX path.

CHANGES SINCE LAST UPDATE
  https://reviews.freebsd.org/D20276?vs=57439&id=57577

CHANGES SINCE LAST ACTION
  https://reviews.freebsd.org/D20276/new/

REVISION DETAIL
  https://reviews.freebsd.org/D20276

AFFECTED FILES
  usr.sbin/bhyve/pci_virtio_net.c

EMAIL PREFERENCES
  https://reviews.freebsd.org/settings/panel/emailpreferences/

To: aleksandr.fedorov_itglobal.com, #bhyve, jhb, rgrimes, krion, 
v.maffione_gmail.com
Cc: mizhka_gmail.com, novel, olevole_olevole.ru, freebsd-virtualization-list, 
evgueni.gavrilov_itglobal.com, bcran
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -73,6 +73,8 @@
 
 #define VTNET_MAXSEGS	256
 
+#define VTNET_MIN_AVAIL_DESC	64
+
 /*
  * Host capabilities.  Note that we only offer a few of these.
  */
@@ -392,85 +394,107 @@
 }
 
 static __inline int
-pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
 {
-	int r, i;
-	int len = 0;
+	char *buf;
+	int i;
+	int frag_size;
+	int iov_off;
+	int len;
+	int nm_off;
+	int nm_buf_size;
 
-	for (r = nmd->cur_tx_ring; ; ) {
-		struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
-		uint32_t cur, idx;
-		char *buf;
+	struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, nmd->cur_tx_ring);
 
-		if (nm_ring_empty(ring)) {
-			r++;
-			if (r > nmd->last_tx_ring)
-				r = nmd->first_tx_ring;
-			if (r == nmd->cur_tx_ring)
-				break;
-			continue;
+	if ((nm_ring_space(ring) * ring->nr_buf_size) < iovsize) {
+		/*
+		 * No more avail space in TX ring, try to flush it.
+		 */
+		ioctl(nmd->fd, NIOCTXSYNC, NULL);
+		return (0);
+	}
+
+	i = ring->cur;
+	buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+	iov_off = 0;
+	len = iovsize;
+	nm_buf_size = ring->nr_buf_size;
+	nm_off = 0;
+
+	while (iovsize) {
+
+		if (unlikely(iov_off == iov->iov_len)) {
+			iov++;
+			iov_off = 0;
 		}
-		cur = ring->cur;
-		idx = ring->slot[cur].buf_idx;
-		buf = NETMAP_BUF(ring, idx);
 
-		for (i = 0; i < iovcnt; i++) {
-			if (len + iov[i].iov_len > 2048)
-				break;
-			memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
-			len += iov[i].iov_len;
+		if (unlikely(nm_off == nm_buf_size)) {
+			ring->slot[i].flags = NS_MOREFRAG;
+			i = nm_ring_next(ring, i);
+			buf = NETMAP_BUF(ring, ring->slot[i].buf_idx);
+			nm_off = 0;
 		}
-		ring->slot[cur].len = len;
-		ring->head = ring->cur = nm_ring_next(ring, cur);
-		nmd->cur_tx_ring = r;
-		ioctl(nmd->fd, NIOCTXSYNC, NULL);
-		break;
+
+		frag_size = MIN(nm_buf_size - nm_off, iov->iov_len - iov_off);
+		memcpy(buf + nm_off, iov->iov_base + iov_off, frag_size);
+
+		iovsize -= frag_size;
+		iov_off += frag_size;
+		nm_off += frag_size;
+
+		ring->slot[i].len = nm_off;
 	}
 
+	/* The last slot must not have NS_MOREFRAG set. */
+	ring->slot[i].flags &= ~NS_MOREFRAG;
+	ring->head = ring->cur = nm_ring_next(ring, i);
+	ioctl(nmd->fd, NIOCTXSYNC, NULL);
+
 	return (len);
 }
 
 static __inline int
-pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt, int iovsize)
 {
-	int len = 0;
-	int i = 0;
-	int r;
+	char *buf;
+	int i;
+	int iov_off;
+	int frag_size;
+	int len;
+	int nm_off;
 
-	for (r = nmd->cur_rx_ring; ; ) {
-		struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
-		uint32_t cur, idx;
-		char *buf;
-		size_t left;
+	struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
 
-		if (nm_ring_empty(ring)) {
-			r++;
-			if (r > nmd->last_rx_ring)
-				r = nmd->first_rx_ring;
-			if (r == nmd->cur_rx_ring)
-				break;
-			continue;
+	i = r->cur;
+	buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+	iov_off = 0;
+	nm_off = 0;
+	len = iovsize;
+
+	while (iovsize) {
+
+		if (unlikely(iov_off == iov->iov_len)) {
+			iov++;
+			iov_off = 0;
 		}
-		cur = ring->cur;
-		idx = ring->slot[cur].buf_idx;
-		buf = NETMAP_BUF(ring, idx);
-		left = ring->slot[cur].len;
 
-		for (i = 0; i < iovcnt && left > 0; i++) {
-			if (iov[i].iov_len > left)
-				iov[i].iov_len = left;
-			memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
-			len += iov[i].iov_len;
-			left -= iov[i].iov_len;
+		if (unlikely(nm_off == r->slot[i].len)) {
+			i = nm_ring_next(r, i);
+			buf = NETMAP_BUF(r, r->slot[i].buf_idx);
+			nm_off = 0;
 		}
-		ring->head = ring->cur = nm_ring_next(ring, cur);
-		nmd->cur_rx_ring = r;
-		ioctl(nmd->fd, NIOCRXSYNC, NULL);
-		break;
+
+		frag_size = MIN(r->slot[i].len - nm_off, iov->iov_len - iov_off);
+		memcpy(iov->iov_base + iov_off, buf + nm_off, frag_size);
+
+		iovsize -= frag_size;
+		iov_off += frag_size;
+		nm_off += frag_size;
 	}
-	for (; i < iovcnt; i++)
-		iov[i].iov_len = 0;
 
+	r->head = r->cur = nm_ring_next(r, i);
+	ioctl(nmd->fd, NIOCRXSYNC, NULL);
+
 	return (len);
 }
 
@@ -481,32 +505,102 @@
 pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
 		    int len)
 {
-	static char pad[60]; /* all zero bytes */
-
 	if (sc->vsc_nmd == NULL)
 		return;
 
-	/*
-	 * If the length is < 60, pad out to that and add the
-	 * extra zero'd segment to the iov. It is guaranteed that
-	 * there is always an extra iov available by the caller.
-	 */
-	if (len < 60) {
-		iov[iovcnt].iov_base = pad;
-		iov[iovcnt].iov_len = 60 - len;
-		iovcnt++;
+	(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt, len);
+}
+
+static __inline int
+vq_avail_to_iovec(struct vqueue_info *vq, struct iovec *iov, int len, int start,
+		int minavail)
+{
+	int idx;
+	uint16_t mask = vq->vq_qsize - 1;
+	volatile struct virtio_desc *vdir;
+	struct vmctx *ctx = vq->vq_vs->vs_pi->pi_vmctx;
+
+	uint16_t ndesc = (uint16_t)(vq->vq_avail->va_idx - vq->vq_last_avail - start);
+
+	if (ndesc < minavail)
+		return (0);
+
+	int off = 0;
+	int uidx = vq->vq_used->vu_idx + start;
+
+	for (int i = 0; i < ndesc; i++) {
+		idx = vq->vq_avail->va_ring[(vq->vq_last_avail + i +  start) & mask];
+		vdir = &vq->vq_desc[idx];
+
+		iov[i].iov_base = paddr_guest2host(ctx, 
+				vdir->vd_addr, vdir->vd_len);
+		iov[i].iov_len = vdir->vd_len;
+
+		off += vdir->vd_len;
+
+		vq->vq_used->vu_ring[uidx & mask].vu_idx = idx;
+		vq->vq_used->vu_ring[uidx & mask].vu_tlen = 
+			(off >= len) ? vdir->vd_len - (off - len) : vdir->vd_len;
+
+		uidx++;
+
+		if (off >= len) {
+			return (i + 1);
+		}
 	}
-	(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+
+	return (0);
 }
 
+static __inline void
+vq_inc_used_idx_and_last_avail(struct vqueue_info *vq, int n)
+{
+	if (n > 0) {
+		vq->vq_last_avail += n;
+		vq->vq_used->vu_idx += n;
+	}
+}
+
+static __inline int
+netmap_next_pkt_len(struct nm_desc *nmd)
+{
+	int i;
+	int len;
+	struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+	len = 0;
+
+	for (i = r->cur; i != r->tail; i = nm_ring_next(r, i)) {
+		len += r->slot[i].len;
+		if (!(r->slot[i].flags & NS_MOREFRAG))
+			break;
+	}
+
+	return (len);
+}
+
+static __inline void
+netmap_drop_pkt(struct nm_desc *nmd)
+{
+	int i;
+	struct netmap_ring *r = NETMAP_RXRING(nmd->nifp, nmd->cur_rx_ring);
+
+	for (i = r->cur; i != r->tail; i = nm_ring_next(r, i)) {
+		if (!(r->slot[i].flags & NS_MOREFRAG)) {
+			r->head = r->cur = nm_ring_next(r, i);
+			return;
+		}
+	}
+}
+
 static void
 pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
 {
-	struct iovec iov[VTNET_MAXSEGS], *riov;
+	struct iovec iov[VTNET_RINGSZ], *riov;
 	struct vqueue_info *vq;
-	void *vrx;
-	int len, n;
-	uint16_t idx;
+	int len;
+	int n;
+	int used;
 
 	/*
 	 * Should never be called without a valid netmap descriptor
@@ -517,11 +611,11 @@
 	 * But, will be called when the rx ring hasn't yet
 	 * been set up or the guest is resetting the device.
 	 */
-	if (!sc->vsc_rx_ready || sc->resetting) {
+	if (unlikely((!sc->vsc_rx_ready || sc->resetting))) {
 		/*
 		 * Drop the packet and try later.
 		 */
-		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		netmap_drop_pkt(sc->vsc_nmd);
 		return;
 	}
 
@@ -529,63 +623,54 @@
 	 * Check for available rx buffers
 	 */
 	vq = &sc->vsc_queues[VTNET_RXQ];
-	if (!vq_has_descs(vq)) {
+	if (unlikely(!vq_has_descs(vq))) {
 		/*
 		 * Drop the packet and try later.  Interrupt on
 		 * empty, if that's negotiated.
 		 */
-		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		netmap_drop_pkt(sc->vsc_nmd);
 		vq_endchains(vq, 1);
 		return;
 	}
 
+	used = 0;
+
 	do {
-		/*
-		 * Get descriptor chain.
-		 */
-		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
-		assert(n >= 1 && n <= VTNET_MAXSEGS);
+		len = netmap_next_pkt_len(sc->vsc_nmd);
 
-		/*
-		 * Get a pointer to the rx header, and use the
-		 * data immediately following it for the packet buffer.
-		 */
-		vrx = iov[0].iov_base;
-		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+		if (unlikely(len == 0)) {
+			vq_inc_used_idx_and_last_avail(vq, used);
+			vq_endchains(vq, 0);
+			return;
+		}
 
-		len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
+		n = vq_avail_to_iovec(vq, iov, len + sc->rx_vhdrlen, used,
+				VTNET_MIN_AVAIL_DESC);
 
-		if (len == 0) {
-			/*
-			 * No more packets, but still some avail ring
-			 * entries.  Interrupt if needed/appropriate.
-			 */
-			vq_retchain(vq);
+		if (unlikely(n == 0)) {
+			vq_inc_used_idx_and_last_avail(vq, used);
 			vq_endchains(vq, 0);
 			return;
 		}
 
-		/*
-		 * The only valid field in the rx packet header is the
-		 * number of buffers if merged rx bufs were negotiated.
-		 */
-		memset(vrx, 0, sc->rx_vhdrlen);
-
 		if (sc->rx_merge) {
-			struct virtio_net_rxhdr *vrxh;
-
-			vrxh = vrx;
-			vrxh->vrh_bufs = 1;
+			struct virtio_net_rxhdr *vrxh = iov[0].iov_base;
+			memset(vrxh, 0, sc->rx_vhdrlen);
+			vrxh->vrh_bufs = n;
 		}
 
-		/*
-		 * Release this chain and handle more chains.
-		 */
-		vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+		(void)pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n, len);
+
+		used += n;
+
 	} while (vq_has_descs(vq));
 
-	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+	vq_inc_used_idx_and_last_avail(vq, used);
 	vq_endchains(vq, 1);
+
+	return;
 }
 
 static void

_______________________________________________
freebsd-virtualization@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
"freebsd-virtualization-unsubscr...@freebsd.org"

Reply via email to