In the absence of a zero-copy API, we cannot touch physical memory directly.
This patch introduces a #define to control whether we touch buffers directly,
or bounce the buffers through cpu_physical_memory_rw.
The #define is needed for merging virtio into upstream QEMU since zero-copy will
not be accepted as-is. As a bonus, I also added physical memory dirtying
support to the zero-copy case so now live migration should work when using
virtio.
With this patch, virtio is mergable into upstream QEMU (except for virtio-net,
that's a different series though).
Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c
index 1a58940..9396580 100644
--- a/qemu/hw/virtio.c
+++ b/qemu/hw/virtio.c
@@ -17,6 +17,8 @@
#include "virtio.h"
#include "sysemu.h"
+//#define VIRTIO_ZERO_COPY
+
/* from Linux's linux/virtio_pci.h */
/* A 32-bit r/o bitmask of the features supported by the host */
@@ -104,6 +106,7 @@ struct VirtQueue
#define VIRTIO_PCI_QUEUE_MAX 16
/* virt queue functions */
+#ifdef VIRTIO_ZERO_COPY
static void *virtio_map_gpa(target_phys_addr_t addr, size_t size)
{
ram_addr_t off;
@@ -138,6 +141,7 @@ static void *virtio_map_gpa(target_phys_addr_t addr, size_t
size)
return phys_ram_base + off;
}
+#endif
static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa)
{
@@ -258,6 +262,38 @@ int virtio_queue_empty(VirtQueue *vq)
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len, unsigned int idx)
{
+ unsigned int offset;
+ int i;
+
+#ifndef VIRTIO_ZERO_COPY
+ for (i = 0; i < elem->out_num; i++)
+ qemu_free(elem->out_sg[i].iov_base);
+#endif
+
+ offset = 0;
+ for (i = 0; i < elem->in_num; i++) {
+ size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
+
+#ifdef VIRTIO_ZERO_COPY
+ if (size) {
+ ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base -
phys_ram_base;
+ ram_addr_t off;
+
+ for (off = 0; off < size; off += TARGET_PAGE_SIZE)
+ cpu_physical_memory_set_dirty(addr + off);
+ }
+#else
+ if (size)
+ cpu_physical_memory_write(elem->in_addr[i],
+ elem->in_sg[i].iov_base,
+ size);
+
+ qemu_free(elem->in_sg[i].iov_base);
+#endif
+
+ offset += size;
+ }
+
idx = (idx + vring_used_idx(vq)) % vq->vring.num;
/* Get a pointer to the next entry in the used ring. */
@@ -372,14 +408,35 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
do {
struct iovec *sg;
- if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)
+ if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
+ elem->in_addr[elem->in_num] = vring_desc_addr(vq, i);
sg = &elem->in_sg[elem->in_num++];
- else
+ } else
sg = &elem->out_sg[elem->out_num++];
/* Grab the first descriptor, and check it's OK. */
sg->iov_len = vring_desc_len(vq, i);
+
+#ifdef VIRTIO_ZERO_COPY
sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len);
+#else
+ /* cap individual scatter element size to prevent unbounded allocations
+ of memory from the guest. Practically speaking, no virtio driver
+ will ever pass more than a page in each element. We set the cap to
+ be 2MB in case for some reason a large page makes it way into the
+ sg list. When we implement a zero copy API, this limitation will
+ disappear */
+ if (sg->iov_len > (2 << 20))
+ sg->iov_len = 2 << 20;
+
+ sg->iov_base = qemu_malloc(sg->iov_len);
+ if (sg->iov_base &&
+ !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) {
+ cpu_physical_memory_read(vring_desc_addr(vq, i),
+ sg->iov_base,
+ sg->iov_len);
+ }
+#endif
if (sg->iov_base == NULL)
errx(1, "Invalid mapping\n");
diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h
index 86dc36a..f04e75f 100644
--- a/qemu/hw/virtio.h
+++ b/qemu/hw/virtio.h
@@ -58,6 +58,7 @@ typedef struct VirtQueueElement
unsigned int index;
unsigned int out_num;
unsigned int in_num;
+ target_phys_addr_t in_addr[VIRTQUEUE_MAX_SIZE];
struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
} VirtQueueElement;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html