virtio is currently pretty sloppy about accessing guest memory directly. This is problematic both because phys_ram_base + PA is not only valid but also because it requires the host and guest both be of the same endianness.
This patch converts virtio to use the QEMU st/ld functions to manipulate the ring queues. I've tested both virtio-blk and virtio-net. I've also confirm that performance doesn't regress with virtio-net. We still use phys_ram_base to create the struct iovec to handle IO operations. I'm still trying to think of a way to eliminate that that doesn't involve an unnecessary copy or a full blown DMA API. This patch should make it easier to use virtio with QEMU (in the absence of KVM). Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c index 3d54c4e..e1b0aad 100644 --- a/qemu/hw/virtio-net.c +++ b/qemu/hw/virtio-net.c @@ -114,7 +114,7 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) int offset, i; /* FIXME: the drivers really need to set their status better */ - if (n->rx_vq->vring.avail == NULL) { + if (!virtio_ring_inited(n->rx_vq)) { n->can_receive = 0; return; } @@ -174,7 +174,7 @@ void virtio_net_poll(void) continue; /* FIXME: the drivers really need to set their status better */ - if (vnet->rx_vq->vring.avail == NULL) { + if (!virtio_ring_inited(vnet->rx_vq)) { vnet->can_receive = 0; continue; } @@ -257,8 +257,8 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) VirtIONet *n = to_virtio_net(vdev); if (n->tx_timer_active && - (vq->vring.avail->idx - vq->last_avail_idx) == 64) { - vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; + virtio_ring_avail_size(vq) == 64) { + virtio_ring_set_used_notify(vq, 0); qemu_del_timer(n->tx_timer); n->tx_timer_active = 0; virtio_net_flush_tx(n, vq); @@ -266,7 +266,7 @@ static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) qemu_mod_timer(n->tx_timer, qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); n->tx_timer_active = 1; - vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + virtio_ring_set_used_notify(vq, 1); } } @@ -280,7 +280,7 @@ static void virtio_net_tx_timer(void *opaque) if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) return; - n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; + virtio_ring_set_used_notify(n->tx_vq, 0); virtio_net_flush_tx(n, n->tx_vq); } diff --git a/qemu/hw/virtio.c b/qemu/hw/virtio.c index 9100bb1..5a905b1 100644 --- a/qemu/hw/virtio.c +++ b/qemu/hw/virtio.c @@ -17,6 +17,37 @@ #include "virtio.h" #include "sysemu.h" +/* from Linux's linux/virtio_ring.h */ + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 + +/* This means don't notify other side when buffer added. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* This means don't interrupt guest when buffer consumed. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +#define VIRTIO_PCI_QUEUE_MAX 16 + +typedef struct VRing +{ + unsigned int num; + target_phys_addr_t desc; + target_phys_addr_t avail; + target_phys_addr_t used; +} VRing; + +struct VirtQueue +{ + VRing vring; + uint32_t pfn; + uint16_t last_avail_idx; + void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); + int index; +}; + /* from Linux's linux/virtio_pci.h */ /* A 32-bit r/o bitmask of the features supported by the host */ @@ -58,11 +89,74 @@ /* virt queue functions */ -static void virtqueue_init(VirtQueue *vq, void *p) +static void virtqueue_init(VirtQueue *vq, target_phys_addr_t p) { vq->vring.desc = p; - vq->vring.avail = p + vq->vring.num * sizeof(VRingDesc); - vq->vring.used = (void *)TARGET_PAGE_ALIGN((unsigned long)&vq->vring.avail->ring[vq->vring.num]); + vq->vring.avail = p + vq->vring.num * 16; + vq->vring.used = vq->vring.avail + 2 * (2 + vq->vring.num); + vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.used); +} + +static uint64_t vring_desc_addr(VirtQueue *vq, unsigned int i) +{ + return ldq_phys(vq->vring.desc + i * 16 + 0); +} + +static uint32_t vring_desc_len(VirtQueue *vq, unsigned int i) +{ + return ldl_phys(vq->vring.desc + i * 16 + 8); +} + +static uint16_t vring_desc_flags(VirtQueue *vq, unsigned int i) +{ + return lduw_phys(vq->vring.desc + i * 16 + 12); +} + +static uint16_t vring_desc_next(VirtQueue *vq, unsigned int i) +{ + return lduw_phys(vq->vring.desc + i * 16 + 14); +} + +static uint16_t vring_avail_flags(VirtQueue *vq) +{ + return lduw_phys(vq->vring.avail + 0); +} + +static uint16_t vring_avail_idx(VirtQueue *vq) +{ + return lduw_phys(vq->vring.avail + 2); +} + +static uint16_t vring_avail_ring(VirtQueue *vq, unsigned int i) +{ + return lduw_phys(vq->vring.avail + 4 + i * 2); +} + +static void vring_used_set_flag(VirtQueue *vq, uint16_t flag) +{ + stw_phys(vq->vring.used + 0, lduw_phys(vq->vring.used + 0) | flag); +} + +static void vring_used_unset_flag(VirtQueue *vq, uint16_t flag) +{ + stw_phys(vq->vring.used + 0, lduw_phys(vq->vring.used + 0) & ~flag); +} + +static uint16_t vring_used_get_idx(VirtQueue *vq) +{ + return lduw_phys(vq->vring.used + 2); +} + +static void vring_used_set_idx(VirtQueue *vq, uint16_t value) +{ + stw_phys(vq->vring.used + 2, value); +} + +static void vring_used_set_ring(VirtQueue *vq, unsigned int i, + uint32_t id, uint32_t len) +{ + stl_phys(vq->vring.used + 4 + i * 8 + 0, id); + stl_phys(vq->vring.used + 4 + i * 8 + 4, len); } static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) @@ -70,11 +164,11 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ - if (!(vq->vring.desc[i].flags & VRING_DESC_F_NEXT)) + if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) return vq->vring.num; /* Check they're not leading us off end of descriptors. */ - next = vq->vring.desc[i].next; + next = vring_desc_next(vq, i); /* Make sure compiler knows to grab that: we don't want it changing! */ wmb(); @@ -87,15 +181,12 @@ static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { - VRingUsedElem *used; + uint16_t idx; - /* Get a pointer to the next entry in the used ring. */ - used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; - used->id = elem->index; - used->len = len; - /* Make sure buffer is written before we update index. */ + idx = vring_used_get_idx(vq); + vring_used_set_ring(vq, idx % vq->vring.num, elem->index, len); wmb(); - vq->vring.used->idx++; + vring_used_set_idx(vq, idx + 1); } int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) @@ -104,17 +195,17 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) unsigned int position; /* Check it isn't doing very strange things with descriptor numbers. */ - if ((uint16_t)(vq->vring.avail->idx - vq->last_avail_idx) > vq->vring.num) + if ((uint16_t)(vring_avail_idx(vq) - vq->last_avail_idx) > vq->vring.num) errx(1, "Guest moved used index from %u to %u", - vq->last_avail_idx, vq->vring.avail->idx); + vq->last_avail_idx, vring_avail_idx(vq)); /* If there's nothing new since last we looked, return invalid. */ - if (vq->vring.avail->idx == vq->last_avail_idx) + if (vring_avail_idx(vq) == vq->last_avail_idx) return 0; /* Grab the next descriptor number they're advertising, and increment * the index we've seen. */ - head = vq->vring.avail->ring[vq->last_avail_idx++ % vq->vring.num]; + head = vring_avail_ring(vq, vq->last_avail_idx++ % vq->vring.num); /* If their number is silly, that's a fatal mistake. */ if (head >= vq->vring.num) @@ -127,17 +218,17 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) do { struct iovec *sg; - if ((vq->vring.desc[i].addr + vq->vring.desc[i].len) > ram_size) + if ((vring_desc_addr(vq, i) + vring_desc_len(vq, i)) > ram_size) errx(1, "Guest sent invalid pointer"); - if (vq->vring.desc[i].flags & VRING_DESC_F_WRITE) + if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) sg = &elem->in_sg[elem->in_num++]; else sg = &elem->out_sg[elem->out_num++]; /* Grab the first descriptor, and check it's OK. */ - sg->iov_len = vq->vring.desc[i].len; - sg->iov_base = phys_ram_base + vq->vring.desc[i].addr; + sg->iov_len = vring_desc_len(vq, i); + sg->iov_base = phys_ram_base + vring_desc_addr(vq, i); /* If we've got too many, that implies a descriptor loop. */ if ((elem->in_num + elem->out_num) > vq->vring.num) @@ -172,9 +263,9 @@ void virtio_reset(void *opaque) vdev->isr = 0; for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { - vdev->vq[i].vring.desc = NULL; - vdev->vq[i].vring.avail = NULL; - vdev->vq[i].vring.used = NULL; + vdev->vq[i].vring.desc = 0; + vdev->vq[i].vring.avail = 0; + vdev->vq[i].vring.used = 0; vdev->vq[i].last_avail_idx = 0; vdev->vq[i].pfn = 0; } @@ -199,7 +290,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) if (pa == 0) { virtio_reset(vdev); } else if (pa < (ram_size - TARGET_PAGE_SIZE)) { - virtqueue_init(&vdev->vq[vdev->queue_sel], phys_ram_base + pa); + virtqueue_init(&vdev->vq[vdev->queue_sel], pa); /* FIXME if pa == 0, deal with device tear down */ } break; @@ -386,14 +477,32 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) { /* Always notify when queue is empty */ - if (vq->vring.avail->idx != vq->last_avail_idx && - (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) + if (vring_avail_idx(vq) != vq->last_avail_idx && + (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)) return; vdev->isr = 1; virtio_update_irq(vdev); } +void virtio_ring_set_used_notify(VirtQueue *vq, int enable) +{ + if (enable) + vring_used_set_flag(vq, VRING_USED_F_NO_NOTIFY); + else + vring_used_unset_flag(vq, VRING_USED_F_NO_NOTIFY); +} + +size_t virtio_ring_avail_size(VirtQueue *vq) +{ + return vring_avail_idx(vq) - vq->last_avail_idx; +} + +int virtio_ring_inited(VirtQueue *vq) +{ + return (vq->vring.avail != 0); +} + VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, uint16_t vendor, uint16_t device, uint16_t subvendor, uint16_t subdevice, @@ -413,7 +522,7 @@ VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, vdev->status = 0; vdev->isr = 0; vdev->queue_sel = 0; - memset(vdev->vq, 0, sizeof(vdev->vq)); + vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); config = pci_dev->config; config[0x00] = vendor & 0xFF; diff --git a/qemu/hw/virtio.h b/qemu/hw/virtio.h index dee97ba..8291bbd 100644 --- a/qemu/hw/virtio.h +++ b/qemu/hw/virtio.h @@ -30,66 +30,9 @@ /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 -/* from Linux's linux/virtio_ring.h */ - -/* This marks a buffer as continuing via the next field. */ -#define VRING_DESC_F_NEXT 1 -/* This marks a buffer as write-only (otherwise read-only). */ -#define VRING_DESC_F_WRITE 2 - -/* This means don't notify other side when buffer added. */ -#define VRING_USED_F_NO_NOTIFY 1 -/* This means don't interrupt guest when buffer consumed. */ -#define VRING_AVAIL_F_NO_INTERRUPT 1 - typedef struct VirtQueue VirtQueue; typedef struct VirtIODevice VirtIODevice; -typedef struct VRingDesc -{ - uint64_t addr; - uint32_t len; - uint16_t flags; - uint16_t next; -} VRingDesc; - -typedef struct VRingAvail -{ - uint16_t flags; - uint16_t idx; - uint16_t ring[0]; -} VRingAvail; - -typedef struct VRingUsedElem -{ - uint32_t id; - uint32_t len; -} VRingUsedElem; - -typedef struct VRingUsed -{ - uint16_t flags; - uint16_t idx; - VRingUsedElem ring[0]; -} VRingUsed; - -typedef struct VRing -{ - unsigned int num; - VRingDesc *desc; - VRingAvail *avail; - VRingUsed *used; -} VRing; - -struct VirtQueue -{ - VRing vring; - uint32_t pfn; - uint16_t last_avail_idx; - void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); - int index; -}; - #define VIRTQUEUE_MAX_SIZE 1024 typedef struct VirtQueueElement @@ -101,8 +44,6 @@ typedef struct VirtQueueElement struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; } VirtQueueElement; -#define VIRTIO_PCI_QUEUE_MAX 16 - struct VirtIODevice { PCIDevice pci_dev; @@ -119,7 +60,7 @@ struct VirtIODevice uint32_t (*get_features)(VirtIODevice *vdev); void (*set_features)(VirtIODevice *vdev, uint32_t val); void (*update_config)(VirtIODevice *vdev, uint8_t *config); - VirtQueue vq[VIRTIO_PCI_QUEUE_MAX]; + VirtQueue *vq; }; VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, @@ -140,4 +81,10 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem); void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); +void virtio_ring_set_used_notify(VirtQueue *vq, int enable); + +size_t virtio_ring_avail_size(VirtQueue *vq); + +int virtio_ring_inited(VirtQueue *vq); + #endif ------------------------------------------------------------------------- Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel