From: Dor Laor <[EMAIL PROTECTED]> This patch prevents qemu handlers from reading the tap and instead it selects the tap descriptors for virtio devices. This eliminates copies and also batch guest notifications (interrupts).
Using this patch the rx performance reaches 800Mbps. -net user option remains as before and does not enjoy the performance improvment. Signed-off-by: Dor Laor <[EMAIL PROTECTED]> Signed-off-by: Avi Kivity <[EMAIL PROTECTED]> diff --git a/qemu/hw/pc.h b/qemu/hw/pc.h index 95471f3..5d4c747 100644 --- a/qemu/hw/pc.h +++ b/qemu/hw/pc.h @@ -145,7 +145,7 @@ void isa_ne2000_init(int base, qemu_irq irq, NICInfo *nd); /* virtio-net.c */ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn); - +void virtio_net_poll(void); /* virtio-blk.h */ void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c index f6f1f28..3940743 100644 --- a/qemu/hw/virtio-net.c +++ b/qemu/hw/virtio-net.c @@ -60,8 +60,13 @@ typedef struct VirtIONet VirtQueue *tx_vq; VLANClientState *vc; int can_receive; + int tap_fd; + struct VirtIONet *next; + int do_notify; } VirtIONet; +static VirtIONet *VirtIONetHead = NULL; + static VirtIONet *to_virtio_net(VirtIODevice *vdev) { return (VirtIONet *)vdev; @@ -96,6 +101,7 @@ static int virtio_net_can_receive(void *opaque) return (n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) && n->can_receive; } +/* -net user receive function */ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) { VirtIONet *n = opaque; @@ -134,6 +140,87 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) virtio_notify(&n->vdev, n->rx_vq); } +/* -net tap receive handler */ +void virtio_net_poll(void) +{ + VirtIONet *vnet; + int len; + fd_set rfds; + struct timeval tv; + int max_fd = -1; + VirtQueueElement elem; + struct virtio_net_hdr *hdr; + int did_notify; + + FD_ZERO(&rfds); + tv.tv_sec = 0; + tv.tv_usec = 0; + + while (1) { + + // Prepare the set of device to select from + for (vnet = VirtIONetHead; vnet; vnet = vnet->next) { + + if (vnet->tap_fd == -1) + continue; + + vnet->do_notify = 0; + //first check if the driver is ok + if (!virtio_net_can_receive(vnet)) + continue; + + /* FIXME: the drivers really need to set their status better */ + if (vnet->rx_vq->vring.avail == NULL) { + vnet->can_receive = 0; + continue; + } + + FD_SET(vnet->tap_fd, &rfds); + if (max_fd < vnet->tap_fd) max_fd = vnet->tap_fd; + } + + if (select(max_fd + 1, &rfds, NULL, NULL, &tv) <= 0) + break; + + // Now check who has data pending in the tap + for (vnet = VirtIONetHead; vnet; vnet = vnet->next) { + + if (!FD_ISSET(vnet->tap_fd, &rfds)) + continue; + + if (virtqueue_pop(vnet->rx_vq, &elem) == 0) { + vnet->can_receive = 0; + continue; + } + + hdr = (void *)elem.in_sg[0].iov_base; + hdr->flags = 0; + hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; +again: + len = readv(vnet->tap_fd, &elem.in_sg[1], elem.in_num - 1); + if (len == -1) { + if (errno == EINTR || errno == EAGAIN) + goto again; + else + fprintf(stderr, "reading network error %d", len); + } + virtqueue_push(vnet->rx_vq, &elem, sizeof(*hdr) + len); + vnet->do_notify = 1; + } + + /* signal other side */ + did_notify = 0; + for (vnet = VirtIONetHead; vnet; vnet = vnet->next) + if (vnet->do_notify) { + virtio_notify(&vnet->vdev, vnet->rx_vq); + did_notify++; + } + if (!did_notify) + break; + } + +} + /* TX */ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) { @@ -175,7 +262,13 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) n->can_receive = 0; memcpy(n->mac, nd->macaddr, 6); n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive, - virtio_net_can_receive, n); + virtio_net_can_receive, n); + n->tap_fd = hack_around_tap(n->vc->vlan->first_client); + if (n->tap_fd != -1) { + n->next = VirtIONetHead; + //push the device on top of the list + VirtIONetHead = n; + } return &n->vdev; } diff --git a/qemu/net.h b/qemu/net.h index 2dfff8d..c8ff6d6 100644 --- a/qemu/net.h +++ b/qemu/net.h @@ -34,6 +34,9 @@ void qemu_handler_true(void *opaque); void do_info_network(void); +/* virtio hack for zero copy receive */ +int hack_around_tap(void *opaque); + /* NIC info */ #define MAX_NICS 8 diff --git a/qemu/vl.c b/qemu/vl.c index 75517b6..c47b294 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -3886,8 +3886,15 @@ typedef struct TAPState { VLANClientState *vc; int fd; char down_script[1024]; + int no_poll; } TAPState; +static int tap_read_poll(void *opaque) +{ + TAPState *s = opaque; + return (!s->no_poll); +} + static void tap_receive(void *opaque, const uint8_t *buf, int size) { TAPState *s = opaque; @@ -3921,6 +3928,22 @@ static void tap_send(void *opaque) } } +int hack_around_tap(void *opaque) +{ + VLANClientState *vc = opaque; + TAPState *ts = vc->opaque; + + if (vc->fd_read != tap_receive) + return -1; + + if (ts) { + ts->no_poll = 1; + return ts->fd; + } + + return -1; +} + /* fd support */ static TAPState *net_tap_fd_init(VLANState *vlan, int fd) @@ -3931,9 +3954,10 @@ static TAPState *net_tap_fd_init(VLANState *vlan, int fd) if (!s) return NULL; s->fd = fd; + s->no_poll = 0; enable_sigio_timer(fd); s->vc = qemu_new_vlan_client(vlan, tap_receive, NULL, s); - qemu_set_fd_handler(s->fd, tap_send, NULL, s); + qemu_set_fd_handler2(s->fd, tap_read_poll, tap_send, NULL, s); snprintf(s->vc->info_str, sizeof(s->vc->info_str), "tap: fd=%d", fd); return s; } @@ -7742,6 +7766,8 @@ void main_loop_wait(int timeout) slirp_select_poll(&rfds, &wfds, &xfds); } #endif + virtio_net_poll(); + qemu_aio_poll(); if (vm_running) { ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2005. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-commits mailing list kvm-commits@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-commits