[PATCH 7/7] Optionally bounce the virtio buffers

Anthony Liguori Wed, 03 Dec 2008 09:27:28 -0800

In the absence of a zero-copy API, we cannot touch physical memory directly.
This patch introduces a #define to control whether we touch buffers directly,
or bounce the buffers through cpu_physical_memory_rw.


The #define is needed for merging virtio into upstream QEMU since zero-copy will
not be accepted as-is.  As a bonus, I also added physical memory dirtying
support to the zero-copy case so now live migration should work when using
virtio.

With this patch, virtio is mergable into upstream QEMU (except for virtio-net,
that's a different series though).

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 1a58940..9396580 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -17,6 +17,8 @@
 #include "virtio.h"
 #include "sysemu.h"
 
+//#define VIRTIO_ZERO_COPY
+
 /* from Linux's linux/virtio_pci.h */
 
 /* A 32-bit r/o bitmask of the features supported by the host */
@@ -104,6 +106,7 @@ struct VirtQueue
 #define VIRTIO_PCI_QUEUE_MAX        16
 
 /* virt queue functions */
+#ifdef VIRTIO_ZERO_COPY
 static void *virtio_map_gpa(target_phys_addr_t addr, size_t size)
 {
     ram_addr_t off;
@@ -138,6 +141,7 @@ static void *virtio_map_gpa(target_phys_addr_t addr, size_t 
size)
 
     return phys_ram_base + off;
 }
+#endif
 
 static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa)
 {
@@ -258,6 +262,38 @@ int virtio_queue_empty(VirtQueue *vq)
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx)
 {
+    unsigned int offset;
+    int i;
+
+#ifndef VIRTIO_ZERO_COPY
+    for (i = 0; i < elem->out_num; i++)
+        qemu_free(elem->out_sg[i].iov_base);
+#endif
+
+    offset = 0;
+    for (i = 0; i < elem->in_num; i++) {
+        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
+
+#ifdef VIRTIO_ZERO_COPY
+        if (size) {
+            ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - 
phys_ram_base;
+            ram_addr_t off;
+
+            for (off = 0; off < size; off += TARGET_PAGE_SIZE)
+                cpu_physical_memory_set_dirty(addr + off);
+        }
+#else
+        if (size)
+            cpu_physical_memory_write(elem->in_addr[i],
+                                      elem->in_sg[i].iov_base,
+                                      size);
+
+        qemu_free(elem->in_sg[i].iov_base);
+#endif
+        
+        offset += size;
+    }
+
     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
 
     /* Get a pointer to the next entry in the used ring. */
@@ -372,14 +408,35 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
     do {
         struct iovec *sg;
 
-        if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)
+        if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
+            elem->in_addr[elem->in_num] = vring_desc_addr(vq, i);
             sg = &elem->in_sg[elem->in_num++];
-        else
+        } else
             sg = &elem->out_sg[elem->out_num++];
 
         /* Grab the first descriptor, and check it's OK. */
         sg->iov_len = vring_desc_len(vq, i);
+
+#ifdef VIRTIO_ZERO_COPY
         sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len);
+#else
+        /* cap individual scatter element size to prevent unbounded allocations
+           of memory from the guest.  Practically speaking, no virtio driver
+           will ever pass more than a page in each element.  We set the cap to
+           be 2MB in case for some reason a large page makes it way into the
+           sg list.  When we implement a zero copy API, this limitation will
+           disappear */
+        if (sg->iov_len > (2 << 20))
+            sg->iov_len = 2 << 20;
+
+        sg->iov_base = qemu_malloc(sg->iov_len);
+        if (sg->iov_base && 
+            !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) {
+            cpu_physical_memory_read(vring_desc_addr(vq, i),
+                                     sg->iov_base,
+                                     sg->iov_len);
+        }
+#endif
         if (sg->iov_base == NULL)
             errx(1, "Invalid mapping\n");
 
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index 86dc36a..f04e75f 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -58,6 +58,7 @@ typedef struct VirtQueueElement
     unsigned int index;
     unsigned int out_num;
     unsigned int in_num;
+    target_phys_addr_t in_addr[VIRTQUEUE_MAX_SIZE];
     struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 } VirtQueueElement;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 7/7] Optionally bounce the virtio buffers

Reply via email to