Where a VLAN consists only of a single tap interface on the
host side and a single virtio interface on the guest side,
we can use the tun/tap driver's vringfd support to eliminate
copies.

We set up a vringfd for both rings and pass them to the
tun/tap driver. When the guest informs us of the location of
the ring pages, we use the VRINGSETINFO ioctl() to inform
the /dev/vring driver.

On xmit, we write() to the vringfd to notify tun/tap of new
buffers. Once the buffers have been sent, the vringfd
becomes readable and we read() from it, causing the buffers
to be placed back on the used ring. We can then notify the
guest that that we're done with them.

On the recv side, the guest makes buffers available via the
ring and we merely poll() the vringfd for notification of
tun/tap having buffers available. When the vringfd becomes
readable, we read() from it to make tun/tap copy its buffers
into the guest buffers and, finally, we notify the guest.

Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
 qemu/hw/virtio-net.c |   78 ++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/virtio.c     |   58 +++++++++++++++++++++++++++++++++++++
 qemu/hw/virtio.h     |    4 ++
 qemu/net.h           |    4 ++
 qemu/vl.c            |   32 ++++++++++++++++++++
 5 files changed, 176 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 6c42bf0..f3ff356 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -15,6 +15,7 @@
 #include "net.h"
 #include "qemu-timer.h"
 #include "qemu-kvm.h"
+#include "qemu-char.h"
 
 /* from Linux's virtio_net.h */
 
@@ -156,6 +157,12 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue 
*vq)
     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
         return;
 
+    if (vq->vringfd > 0) {
+       if (write(vq->vringfd, "", 0) != 0)
+           fprintf(stderr, "Failed to notify host kernel of xmit buffers\n");
+       return;
+    }
+
     while (virtqueue_pop(vq, &elem)) {
        ssize_t len = 0;
 
@@ -205,6 +212,65 @@ static void virtio_net_tx_timer(void *opaque)
     virtio_net_flush_tx(n, n->tx_vq);
 }
 
+static void virtio_net_rx_used(void *opaque)
+{
+    VirtIONet *n = opaque;
+
+    if (read(n->rx_vq->vringfd, NULL, 0) != 0) {
+       fprintf(stderr, "Failed to pull used rx buffers\n");
+       return;
+    }
+
+    virtio_notify(&n->vdev, n->rx_vq);
+}
+
+static void virtio_net_tx_used(void *opaque)
+{
+    VirtIONet *n = opaque;
+
+    if (read(n->tx_vq->vringfd, NULL, 0) != 0) {
+       fprintf(stderr, "Failed to pull used tx buffers\n");
+       return;
+    }
+
+    virtio_notify(&n->vdev, n->tx_vq);
+}
+
+static void virtio_net_close_vrings(VirtIONet *n)
+{
+    if (n->rx_vq->vringfd > 0) {
+       qemu_set_fd_handler(n->rx_vq->vringfd, NULL, NULL, NULL);
+       virtqueue_close_vringfd(n->rx_vq);
+    }
+    if (n->tx_vq->vringfd > 0) {
+       qemu_set_fd_handler(n->tx_vq->vringfd, NULL, NULL, NULL);
+       virtqueue_close_vringfd(n->tx_vq);
+    }
+}
+
+static void virtio_net_init_vrings(VirtIONet *n)
+{
+    struct VLANState *vlan = n->vc->vlan;
+    VLANClientState *host = vlan->first_client;
+
+    if (vlan->nb_host_devs != 1 || vlan->nb_guest_devs != 1)
+       return;
+
+    if (!host->set_vring_fds)
+       return;
+
+    if (!virtqueue_open_vringfd(n->rx_vq) || 
!virtqueue_open_vringfd(n->tx_vq)) {
+       virtio_net_close_vrings(n);
+       return;
+    }
+
+    qemu_set_fd_handler(n->rx_vq->vringfd, virtio_net_rx_used, NULL, n);
+    qemu_set_fd_handler(n->tx_vq->vringfd, virtio_net_tx_used, NULL, n);
+
+    if (!host->set_vring_fds(host, n->rx_vq->vringfd, n->tx_vq->vringfd))
+       virtio_net_close_vrings(n);
+}
+
 static void virtio_net_save(QEMUFile *f, void *opaque)
 {
     VirtIONet *n = opaque;
@@ -224,6 +290,16 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int 
version_id)
 
     virtio_load(&n->vdev, f);
 
+    if (n->rx_vq->vringfd > 0 && n->tx_vq->vringfd > 0) {
+       VLANClientState *host = n->vc->vlan->first_client;
+
+       qemu_set_fd_handler(n->rx_vq->vringfd, virtio_net_rx_used, NULL, n);
+       qemu_set_fd_handler(n->tx_vq->vringfd, virtio_net_tx_used, NULL, n);
+
+       if (!host->set_vring_fds(host, n->rx_vq->vringfd, n->tx_vq->vringfd))
+           virtio_net_close_vrings(n);
+    }
+
     qemu_get_buffer(f, n->mac, 6);
     n->tx_timer_active = qemu_get_be32(f);
 
@@ -258,6 +334,8 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int 
devfn)
     n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
     n->tx_timer_active = 0;
 
+    virtio_net_init_vrings(n);
+
     register_savevm("virtio-net", virtio_net_id++, 1,
                    virtio_net_save, virtio_net_load, n);
 
diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index a1ee93f..e9ec1ad 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -13,6 +13,7 @@
 
 #include <inttypes.h>
 #include <err.h>
+#include <sys/ioctl.h>
 
 #include "virtio.h"
 #include "sysemu.h"
@@ -193,6 +194,56 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
     return elem->in_num + elem->out_num;
 }
 
+/* vringfd functions */
+
+static int virtqueue_setup_vringfd(VirtQueue *vq)
+{
+    struct vring_ioctl_info info;
+
+    if (vq->vringfd <= 0 || !vq->vring.desc)
+       return 1;
+
+    info.num_descs = vq->vring.num;
+    info.descs    = (unsigned long)vq->vring.desc;
+    info.base     = (unsigned long)phys_ram_base;
+    info.limit    = phys_ram_size;
+
+    if (ioctl(vq->vringfd, VRINGSETINFO, &info) != 0) {
+       fprintf(stderr, "Failed to setup vring: %s\n", strerror(errno));
+       return 0;
+    }
+
+    return 1;
+}
+
+int virtqueue_open_vringfd(VirtQueue *vq)
+{
+    int fd;
+
+    fd = open("/dev/vring", O_RDWR);
+    if (fd < 0) {
+       static int warned = 0;
+       if (!warned)
+           fprintf(stderr, "Warning: no vringfd support available: "
+                   "Failed to open /dev/vring\n");
+       warned = 1;
+       return 0;
+    }
+
+    vq->vringfd = fd;
+
+    return virtqueue_setup_vringfd(vq);
+}
+
+void virtqueue_close_vringfd(VirtQueue *vq)
+{
+    if (vq->vringfd <= 0)
+       return;
+
+    close(vq->vringfd);
+    vq->vringfd = 0;
+}
+
 /* virtio device */
 
 static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
@@ -248,6 +299,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
            size_t size = virtqueue_size(vdev->vq[vdev->queue_sel].vring.num);
            virtqueue_init(&vdev->vq[vdev->queue_sel],
                           virtio_map_gpa(pa, size));
+           virtqueue_setup_vringfd(&vdev->vq[vdev->queue_sel]);
        }
        break;
     case VIRTIO_PCI_QUEUE_SEL:
@@ -464,6 +516,8 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, i);
 
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+       qemu_put_byte(f, vdev->vq[i].vringfd > 0);
+
        if (vdev->vq[i].vring.num == 0)
            break;
 
@@ -490,6 +544,9 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
+       if (qemu_get_byte(f))
+           virtqueue_open_vringfd(&vdev->vq[i]);
+
        vdev->vq[i].vring.num = qemu_get_be32(f);
        qemu_get_be32s(f, &vdev->vq[i].pfn);
        vq_last_avail(&vdev->vq[i]) = qemu_get_be16(f);
@@ -501,6 +558,7 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
            pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS;
            size = virtqueue_size(vdev->vq[i].vring.num);
            virtqueue_init(&vdev->vq[i], virtio_map_gpa(pa, size));
+           virtqueue_setup_vringfd(&vdev->vq[i]);
        }
     }
 
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index 142ecbd..d3cf5fb 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -93,6 +93,7 @@ struct VirtQueue
     VRing vring;
     uint32_t pfn;
     int inuse;
+    int vringfd;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -152,4 +153,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
+int virtqueue_open_vringfd(VirtQueue *vq);
+void virtqueue_close_vringfd(VirtQueue *vq);
+
 #endif
diff --git a/qemu/net.h b/qemu/net.h
index 9dc8b7c..e8ed926 100644
--- a/qemu/net.h
+++ b/qemu/net.h
@@ -9,12 +9,15 @@ typedef ssize_t (IOReadvHandler)(void *, const struct iovec 
*, int);
 
 typedef struct VLANClientState VLANClientState;
 
+typedef int (SetVringFDs)(VLANClientState *, int, int);
+
 struct VLANClientState {
     IOReadHandler *fd_read;
     IOReadvHandler *fd_readv;
     /* Packets may still be sent if this returns zero.  It's used to
        rate-limit the slirp code.  */
     IOCanRWHandler *fd_can_read;
+    SetVringFDs *set_vring_fds;
     void *opaque;
     struct VLANClientState *next;
     struct VLANState *vlan;
@@ -26,6 +29,7 @@ struct VLANState {
     VLANClientState *first_client;
     struct VLANState *next;
     unsigned int nb_guest_devs, nb_host_devs;
+    unsigned int hotplug_disabled : 1;
 };
 
 VLANState *qemu_find_vlan(int id);
diff --git a/qemu/vl.c b/qemu/vl.c
index f573dce..d043ccd 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -4166,6 +4166,31 @@ static void tap_send(void *opaque)
     } while (s->size > 0);
 }
 
+#ifdef TUNSETRECVVRING
+static int tap_set_vring_fds(VLANClientState *vc, int recv, int xmit)
+{
+    TAPState *s = vc->opaque;
+
+    if (ioctl(s->fd, TUNSETRECVVRING, recv) != 0) {
+       fprintf(stderr, "TUNSETRECVVRING ioctl() failed: %s\n",
+               strerror(errno));
+       return 0;
+    }
+
+    if (ioctl(s->fd, TUNSETXMITVRING, xmit) != 0) {
+       fprintf(stderr, "TUNSETXMITVRING ioctl() failed: %s\n",
+               strerror(errno));
+       exit(1);
+    }
+
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+
+    vc->vlan->hotplug_disabled = 1;
+
+    return 1;
+}
+#endif
+
 /* fd support */
 
 static TAPState *net_tap_fd_init(VLANState *vlan, int fd)
@@ -4178,6 +4203,9 @@ static TAPState *net_tap_fd_init(VLANState *vlan, int fd)
     s->fd = fd;
     s->vc = qemu_new_vlan_client(vlan, tap_receive, NULL, s);
     s->vc->fd_readv = tap_readv;
+#ifdef TUNSETRECVVRING
+    s->vc->set_vring_fds = tap_set_vring_fds;
+#endif
     qemu_set_fd_handler2(s->fd, tap_can_send, tap_send, NULL, s);
     snprintf(s->vc->info_str, sizeof(s->vc->info_str), "tap: fd=%d", fd);
     return s;
@@ -4974,6 +5002,10 @@ int net_client_init(const char *str)
         fprintf(stderr, "Could not create vlan %d\n", vlan_id);
         return -1;
     }
+    if (vlan->hotplug_disabled) {
+        fprintf(stderr, "Hotplug disabled on vlan %d\n", vlan_id);
+        return -1;
+    }
     if (!strcmp(device, "nic")) {
         NICInfo *nd;
         uint8_t *macaddr;
-- 
1.5.4.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to