The virtio-vhost-user device lets a guest act as a vhost device backend. It works by tunneling vhost-user protocol messages into a guest. The new device syntax is as follows:
-chardev socket,id=chardev0,path=vhost-user.sock,server=on,wait=off \ -device virtio-vhost-user-pci,chardev=chardev0 The VIRTIO device specification is here: https://uarif1.github.io/vvu/v2/virtio-v1.1-cs01 For more information about virtio-vhost-user, see https://wiki.qemu.org/Features/VirtioVhostUser. Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> Signed-off-by: Nikos Dragazis <ndraga...@arrikto.com> Signed-off-by: Usama Arif <usama.a...@bytedance.com> --- hw/virtio/Kconfig | 5 + hw/virtio/meson.build | 2 + hw/virtio/trace-events | 26 + hw/virtio/virtio-pci.c | 13 +- hw/virtio/virtio-vhost-user-pci.c | 471 ++++++++ hw/virtio/virtio-vhost-user.c | 1066 +++++++++++++++++++ hw/virtio/virtio.c | 7 +- include/hw/pci/pci.h | 1 + include/hw/virtio/virtio-pci.h | 7 + include/hw/virtio/virtio-vhost-user.h | 126 +++ include/hw/virtio/virtio.h | 2 + include/standard-headers/linux/virtio_ids.h | 1 + 12 files changed, 1717 insertions(+), 10 deletions(-) create mode 100644 hw/virtio/virtio-vhost-user-pci.c create mode 100644 hw/virtio/virtio-vhost-user.c create mode 100644 include/hw/virtio/virtio-vhost-user.h diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index e9ecae1f50..813bfc4600 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -80,3 +80,8 @@ config VHOST_USER_FS bool default y depends on VIRTIO && VHOST_USER + +config VIRTIO_VHOST_USER + bool + default y + depends on VIRTIO && VHOST_USER \ No newline at end of file diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 7e8877fd64..11ed09d5ff 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -29,6 +29,7 @@ virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) +virtio_ss.add(when: 'CONFIG_VIRTIO_VHOST_USER', if_true: files('virtio-vhost-user.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) @@ -54,6 +55,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VIRTIO_VHOST_USER', if_true: files('virtio-vhost-user-pci.c')) virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index ab8e095b73..9dccf723ad 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -85,6 +85,32 @@ virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d ac virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d" virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: 0x%"PRIx64" num_pages: %d" +# hw/virtio/virtio-vhost-user.c +virtio_vhost_user_m2s_bad_version(void *s, unsigned int version) "s %p version %u" +virtio_vhost_user_m2s_unexpected_reply(void *s) "s %p" +virtio_vhost_user_m2s_bad_payload_size(void *s, unsigned int size) "s %p size %u" +virtio_vhost_user_m2s_bad_request(void *s, unsigned request) "s %p request %u" +virtio_vhost_user_m2s_request(void *s, unsigned int request) "s %p request %u" +virtio_vhost_user_m2s_unknown_request(void *s, unsigned int request) "s %p request %u" +virtio_vhost_user_s2m_bad_version(void *s, unsigned int version) "s %p version %u" +virtio_vhost_user_s2m_expected_reply(void *s) "s %p" +virtio_vhost_user_s2m_bad_payload_size(void *s, unsigned int size) "s %p size %u" +virtio_vhost_user_s2m_bad_request(void *s, unsigned request) "s %p request %u" +virtio_vhost_user_s2m_request(void *s, unsigned int request) "s %p request %u" +virtio_vhost_user_s2m_unknown_request(void *s, unsigned int request) "s %p request %u" +virtio_vhost_user_rxq_empty(void *s) "s %p" +virtio_vhost_user_tx_done(void *s) "s %p" +virtio_vhost_user_chr_event(void *s, int event) "s %p event %d" +virtio_vhost_user_chr_change(void *s) "s %p" +virtio_vhost_user_conn_state_transition(void *s, int old_state, int event, int new_state) "s %p old_state %d event %d new_state %d" +virtio_vhost_user_set_config(void *s, unsigned int old_status, unsigned int new_status) "s %p old_status %u new_status %u" +virtio_vhost_user_doorbell_write(void *s, unsigned int vq_idx, ssize_t nwritten) "s %p vq_idx %u nwritten %zd" +virtio_vhost_user_notification_read(void *s, uint64_t addr, uint64_t return_val) "s %p addr 0x%"PRIx64" return_val 0x%"PRIx64 +virtio_vhost_user_notification_write(void *s, uint64_t addr, uint64_t val) "s %p addr 0x%"PRIx64" val 0x%"PRIx64 +virtio_vhost_user_guest_notifier_read(int kickfd, uint16_t vector) "kickfd %d vector 0x%"PRIx16 +virtio_vhost_user_memory_region(void *s, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint64_t mmap_offset, void *mmap_addr) "s %p guest_phys_addr 0x%"PRIx64" memory_size 0x%"PRIx64" userspace_addr 0x%"PRIx64" mmap_offset 0x%"PRIx64" mmap_addr %p" + + # virtio-mmio.c virtio_mmio_read(uint64_t offset) "virtio_mmio_read offset 0x%" PRIx64 virtio_mmio_write_offset(uint64_t offset, uint64_t value) "virtio_mmio_write offset 0x%" PRIx64 " value 0x%" PRIx64 diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 0566ad7d00..9ad5c56388 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -18,7 +18,6 @@ #include "qemu/osdep.h" #include "exec/memop.h" -#include "standard-headers/linux/virtio_pci.h" #include "hw/boards.h" #include "hw/virtio/virtio.h" #include "migration/qemu-file-types.h" @@ -222,7 +221,7 @@ static bool virtio_pci_ioeventfd_enabled(DeviceState *d) #define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000 -static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy) +inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy) { return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ? QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4; @@ -1558,11 +1557,11 @@ static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy, proxy->notify_pio.size); } -static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy, - VirtIOPCIRegion *region, - struct virtio_pci_cap *cap, - MemoryRegion *mr, - uint8_t bar) +void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy, + VirtIOPCIRegion *region, + struct virtio_pci_cap *cap, + MemoryRegion *mr, + uint8_t bar) { memory_region_add_subregion(mr, region->offset, ®ion->mr); diff --git a/hw/virtio/virtio-vhost-user-pci.c b/hw/virtio/virtio-vhost-user-pci.c new file mode 100644 index 0000000000..b4e0ba735b --- /dev/null +++ b/hw/virtio/virtio-vhost-user-pci.c @@ -0,0 +1,471 @@ +/* + * Virtio Vhost-user Device + * + * Copyright (C) 2017-2018 Red Hat, Inc. + * + * Authors: + * Stefan Hajnoczi <stefa...@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include "qemu/osdep.h" +#include "hw/pci/pci.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-vhost-user.h" +#include "qapi/error.h" +#include "trace.h" +#include "hw/pci/msix.h" + +typedef struct VirtIOVhostUserPCI VirtIOVhostUserPCI; + +/* + * virtio-vhost-user-pci: This extends VirtioPCIProxy. + */ + +#define TYPE_VIRTIO_VHOST_USER_PCI "virtio-vhost-user-pci-base" +#define VIRTIO_VHOST_USER_PCI(obj) OBJECT_CHECK(VirtIOVhostUserPCI, \ + (obj), TYPE_VIRTIO_VHOST_USER_PCI) +#define VIRTIO_VHOST_USER_PCI_GET_CLASS(obj) \ + OBJECT_GET_CLASS(VirtioVhostUserPCIClass, obj, \ + TYPE_VIRTIO_VHOST_USER_PCI) +#define VIRTIO_VHOST_USER_PCI_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtioVhostUserPCIClass, klass, \ + TYPE_VIRTIO_VHOST_USER_PCI) + +struct VirtIOVhostUserPCI { + VirtIOPCIProxy parent_obj; + VirtIOVhostUser vdev; + + MemoryRegion additional_resources_bar; + + VirtIOPCIRegion doorbells; + VirtIOPCIRegion notifications; + VirtIOPCIRegion shared_memory; +}; + +typedef struct VirtioVhostUserPCIClass { + VirtioPCIClass parent_class; + + void (*set_vhost_mem_regions)(VirtIOVhostUserPCI *vvup); + void (*delete_vhost_mem_region)(VirtIOVhostUserPCI *vvup, MemoryRegion *mr); + void (*cleanup_bar)(VirtIOVhostUserPCI *vvup); + void (*register_doorbell)(VirtIOVhostUserPCI *vvup, EventNotifier *e, + uint8_t vq_idx); + void (*unregister_doorbell)(VirtIOVhostUserPCI *vvup, EventNotifier *e, + uint8_t vq_idx); +} VirtioVhostUserPCIClass; + +static Property virtio_vhost_user_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), + DEFINE_PROP_END_OF_LIST(), +}; + +/* + * Handler for the frontend kickfd notifications. Inject an INTx or MSI-X + * interrupt to the guest in response to the frontend notification. Use the + * appropriate vector in the latter case. + */ +void virtio_vhost_user_guest_notifier_read(EventNotifier *n) +{ + struct kickfd *kickfd = container_of(n, struct kickfd, guest_notifier); + VirtIODevice *vdev = kickfd->vdev; + VirtIOVhostUser *vvu = container_of(vdev, struct VirtIOVhostUser, + parent_obj); + VirtIOVhostUserPCI *vvup = container_of(vvu, struct VirtIOVhostUserPCI, + vdev); + VirtIOPCIProxy *proxy = &vvup->parent_obj; + PCIDevice *pci_dev = &proxy->pci_dev; + + if (event_notifier_test_and_clear(n)) { + /* + * The ISR status register is used only for INTx interrupts. Thus, we + * use it only in this case. + */ + if (!msix_enabled(pci_dev)) { + virtio_set_isr(vdev, 0x2); + } + /* + * Send an interrupt, either with INTx or MSI-X mechanism. msix_notify() + * already handles the case where the MSI-X vector is NO_VECTOR by not + * issuing interrupts. Thus, we don't have to check this case here. + */ + virtio_notify_vector(vdev, kickfd->msi_vector); + + trace_virtio_vhost_user_guest_notifier_read(kickfd->guest_notifier.rfd, + kickfd->msi_vector); + } +} + +static uint64_t virtio_vhost_user_doorbells_read(void *opaque, hwaddr addr, + unsigned size) +{ + return 0; +} + +static void virtio_vhost_user_doorbells_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + VirtIOVhostUserPCI *vvup = opaque; + VirtIOPCIProxy *proxy = &vvup->parent_obj; + VirtIOVhostUser *s = &vvup->vdev; + unsigned idx = addr / virtio_pci_queue_mem_mult(proxy); + + if (idx < VIRTIO_QUEUE_MAX) { + /* We shouldn't reach at this point since we are using ioeventfds. */ + if (event_notifier_get_fd(&s->callfds[idx]) >= 0) { + ssize_t nwritten; + + nwritten = event_notifier_set(&s->callfds[idx]); + trace_virtio_vhost_user_doorbell_write(s, idx, nwritten); + + } + } else if (idx == VIRTIO_QUEUE_MAX) { + /* TODO log doorbell */ + } +} + +static void vvu_register_doorbell(VirtIOVhostUserPCI *vvup, EventNotifier *e, + uint8_t vq_idx) +{ + VirtIOPCIProxy *proxy = &vvup->parent_obj; + hwaddr addr = vq_idx * virtio_pci_queue_mem_mult(proxy); + + /* Register the callfd EventNotifier as ioeventfd */ + memory_region_add_eventfd(&vvup->doorbells.mr, addr, 2, false, vq_idx, e); +} + +void virtio_vhost_user_register_doorbell(VirtIOVhostUser *s, EventNotifier *e, + uint8_t vq_idx) +{ + VirtIOVhostUserPCI *vvup = container_of(s, struct VirtIOVhostUserPCI, vdev); + VirtioVhostUserPCIClass *vvup_class = VIRTIO_VHOST_USER_PCI_GET_CLASS(vvup); + + vvup_class->register_doorbell(vvup, e, vq_idx); +} + +static void vvu_unregister_doorbell(VirtIOVhostUserPCI *vvup, EventNotifier *e, + uint8_t vq_idx) +{ + VirtIOPCIProxy *proxy = &vvup->parent_obj; + hwaddr addr = vq_idx * virtio_pci_queue_mem_mult(proxy); + + /* Unregister the callfd EventNotifier */ + memory_region_del_eventfd(&vvup->doorbells.mr, addr, 2, false, vq_idx, e); +} + +void virtio_vhost_user_unregister_doorbell(VirtIOVhostUser *s, EventNotifier *e, + uint8_t vq_idx) +{ + VirtIOVhostUserPCI *vvup = container_of(s, struct VirtIOVhostUserPCI, vdev); + VirtioVhostUserPCIClass *vvup_class = VIRTIO_VHOST_USER_PCI_GET_CLASS(vvup); + + vvup_class->unregister_doorbell(vvup, e, vq_idx); +} + +static uint64_t virtio_vhost_user_notification_read(void *opaque, hwaddr addr, + unsigned size) +{ + VirtIOVhostUserPCI *vvup = opaque; + VirtIOVhostUser *s = &vvup->vdev; + uint64_t val = 0; + + switch (addr) { + case NOTIFICATION_SELECT: + val = s->nselect; + break; + case NOTIFICATION_MSIX_VECTOR: + if (s->nselect < ARRAY_SIZE(s->kickfds)) { + val = s->kickfds[s->nselect].msi_vector; + } + break; + default: + break; + } + + trace_virtio_vhost_user_notification_read(s, addr, val); + + return val; +} + +/* Set the MSI vectors for the frontend virtqueue notifications. */ +static void virtio_vhost_user_notification_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + /* + * MMIO regions are byte-addressable. The value of the `addr` argument is + * relative to the starting address of the MMIO region. For example, + * `addr = 6` means that the 6th byte of this MMIO region has been written. + */ + VirtIOVhostUserPCI *vvup = opaque; + VirtIOPCIProxy *proxy = &vvup->parent_obj; + VirtIOVhostUser *s = &vvup->vdev; + + switch (addr) { + case NOTIFICATION_SELECT: + if (val < VIRTIO_QUEUE_MAX) { + s->nselect = val; + } + break; + case NOTIFICATION_MSIX_VECTOR: + msix_vector_unuse(&proxy->pci_dev, s->kickfds[s->nselect].msi_vector); + if (msix_vector_use(&proxy->pci_dev, val) < 0) { + val = VIRTIO_NO_VECTOR; + } + s->kickfds[s->nselect].msi_vector = val; + break; + default: + break; + } + + trace_virtio_vhost_user_notification_write(s, addr, val); +} + +/* + * Add the shared memory region as a subregion of the + * additional_resources_bar. + */ +static void vvu_set_vhost_mem_regions(VirtIOVhostUserPCI *vvup) +{ + VirtIOVhostUser *s = &vvup->vdev; + VhostUserMemory m = s->read_msg.payload.memory, *memory = &m; + hwaddr subregion_offset; + uint32_t i; + + /* Start after the notification structure */ + subregion_offset = vvup->shared_memory.offset; + + for (i = 0; i < memory->nregions; i++) { + VirtIOVhostUserMemTableRegion *region = &s->mem_table[i]; + + memory_region_init_ram_device_ptr(®ion->mr, OBJECT(vvup), + "virtio-vhost-user-mem-table-region", + region->total_size, region->mmap_addr); + memory_region_add_subregion(&vvup->additional_resources_bar, + subregion_offset, ®ion->mr); + + subregion_offset += region->total_size; + } +} + +void virtio_vhost_user_set_vhost_mem_regions(VirtIOVhostUser *s) +{ + VirtIOVhostUserPCI *vvup = container_of(s, struct VirtIOVhostUserPCI, vdev); + VirtioVhostUserPCIClass *vvup_class = VIRTIO_VHOST_USER_PCI_GET_CLASS(vvup); + + vvup_class->set_vhost_mem_regions(vvup); +} + +static void vvu_delete_vhost_mem_region(VirtIOVhostUserPCI *vvup, + MemoryRegion *mr) +{ + memory_region_del_subregion(&vvup->additional_resources_bar, mr); + object_unparent(OBJECT(mr)); +} + + +void virtio_vhost_user_delete_vhost_mem_region(VirtIOVhostUser *s, + MemoryRegion *mr) +{ + VirtIOVhostUserPCI *vvup = container_of(s, struct VirtIOVhostUserPCI, vdev); + VirtioVhostUserPCIClass *vvup_class = VIRTIO_VHOST_USER_PCI_GET_CLASS(vvup); + + vvup_class->delete_vhost_mem_region(vvup, mr); +} + +static void virtio_vhost_user_init_bar(VirtIOVhostUserPCI *vvup) +{ + /* virtio-pci doesn't use BAR 2 & 3, so we use it */ + const int bar_index = 2; + + /* + * TODO If the BAR is too large the guest won't have address space to map + * it! + */ + const uint64_t bar_size = 1ULL << 36; + + memory_region_init(&vvup->additional_resources_bar, OBJECT(vvup), + "virtio-vhost-user", bar_size); + pci_register_bar(&vvup->parent_obj.pci_dev, bar_index, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_PREFETCH | + PCI_BASE_ADDRESS_MEM_TYPE_64, + &vvup->additional_resources_bar); + + /* + * Initialize the VirtIOPCIRegions for the virtio configuration structures + * corresponding to the additional device resource capabilities. + * Place the additional device resources in the additional_resources_bar. + */ + VirtIOPCIProxy *proxy = VIRTIO_PCI(vvup); + + vvup->doorbells.offset = 0x0; + /* VIRTIO_QUEUE_MAX + 1 for logfd */ + vvup->doorbells.size = virtio_pci_queue_mem_mult(proxy) + * (VIRTIO_QUEUE_MAX + 1); + /* TODO Not sure if it is necessary for the size to be aligned */ + vvup->doorbells.size = QEMU_ALIGN_UP(vvup->doorbells.size, 4096); + vvup->doorbells.type = VIRTIO_PCI_CAP_DOORBELL_CFG; + + vvup->notifications.offset = vvup->doorbells.offset + vvup->doorbells.size; + vvup->notifications.size = 0x1000; + vvup->notifications.type = VIRTIO_PCI_CAP_NOTIFICATION_CFG; + + /* cap.offset and cap.length must be 4096-byte (0x1000) aligned. */ + vvup->shared_memory.offset = vvup->notifications.offset + + vvup->notifications.size; + vvup->shared_memory.offset = QEMU_ALIGN_UP(vvup->shared_memory.offset, + 4096); + /* TODO Reconsider the shared memory cap.length later */ + /* + * The size of the shared memory region in the additional resources BAR + * doesn't fit into the length field (uint32_t) of the virtio capability + * structure. However, we don't need to pass this information to the guest + * driver via the shared memory capability because the guest can figure out + * the length of the vhost memory regions from the SET_MEM_TABLE vhost-user + * messages. Therefore, the size of the shared memory region that we are + * declaring here has no meaning and the guest driver shouldn't rely on + * this. + */ + vvup->shared_memory.size = 0x1000; + vvup->shared_memory.type = VIRTIO_PCI_CAP_SHARED_MEMORY_CFG; + + /* + * Initialize the MMIO MemoryRegions for the additional device + *resources. + */ + + const struct MemoryRegionOps doorbell_ops = { + .read = virtio_vhost_user_doorbells_read, + .write = virtio_vhost_user_doorbells_write, + .impl = { + .min_access_size = 1, + .max_access_size = 4, + }, + .endianness = DEVICE_LITTLE_ENDIAN, + }; + + const struct MemoryRegionOps notification_ops = { + .read = virtio_vhost_user_notification_read, + .write = virtio_vhost_user_notification_write, + .impl = { + .min_access_size = 1, + .max_access_size = 4, + }, + .endianness = DEVICE_LITTLE_ENDIAN, + }; + + memory_region_init_io(&vvup->doorbells.mr, OBJECT(vvup), + &doorbell_ops, vvup, "virtio-vhost-user-doorbell-cfg", + vvup->doorbells.size); + + memory_region_init_io(&vvup->notifications.mr, OBJECT(vvup), + ¬ification_ops, vvup, + "virtio-vhost-user-notification-cfg", + vvup->notifications.size); + + /* + * Register the virtio PCI configuration structures + * for the additional device resources. This involves + * registering the corresponding MemoryRegions as + * subregions of the additional_resources_bar and creating + * virtio capabilities. + */ + struct virtio_pci_cap cap = { + .cap_len = sizeof cap, + }; + struct virtio_pci_doorbell_cap doorbell = { + .cap.cap_len = sizeof doorbell, + .doorbell_off_multiplier = + cpu_to_le32(virtio_pci_queue_mem_mult(proxy)), + }; + virtio_pci_modern_region_map(proxy, &vvup->doorbells, &doorbell.cap, + &vvup->additional_resources_bar, bar_index); + virtio_pci_modern_region_map(proxy, &vvup->notifications, &cap, + &vvup->additional_resources_bar, bar_index); + virtio_pci_modern_region_map(proxy, &vvup->shared_memory, &cap, + &vvup->additional_resources_bar, bar_index); +} + +static void vvu_cleanup_bar(VirtIOVhostUserPCI *vvup) +{ + memory_region_del_subregion(&vvup->additional_resources_bar, + &vvup->doorbells.mr); + memory_region_del_subregion(&vvup->additional_resources_bar, + &vvup->notifications.mr); +} + +void virtio_vhost_user_cleanup_additional_resources(VirtIOVhostUser *s) +{ + VirtIOVhostUserPCI *vvup = container_of(s, struct VirtIOVhostUserPCI, vdev); + VirtioVhostUserPCIClass *vvup_class = VIRTIO_VHOST_USER_PCI_GET_CLASS(vvup); + + vvup_class->cleanup_bar(vvup); +} + +static void virtio_vhost_user_pci_realize(VirtIOPCIProxy *vpci_dev, + Error **errp) +{ + VirtIOVhostUserPCI *vvup = VIRTIO_VHOST_USER_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&vvup->vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = VIRTIO_QUEUE_MAX + 3; + } + + virtio_vhost_user_init_bar(vvup); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus), &error_abort); + object_property_set_bool(OBJECT(vdev), "realized", true, errp); +} + +static void virtio_vhost_user_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + VirtioVhostUserPCIClass *vvup_class = VIRTIO_VHOST_USER_PCI_CLASS(klass); + + device_class_set_props(dc, virtio_vhost_user_pci_properties); + k->realize = virtio_vhost_user_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VHOST_USER; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_OTHERS; + + vvup_class->set_vhost_mem_regions = vvu_set_vhost_mem_regions; + vvup_class->delete_vhost_mem_region = vvu_delete_vhost_mem_region; + vvup_class->cleanup_bar = vvu_cleanup_bar; + vvup_class->register_doorbell = vvu_register_doorbell; + vvup_class->unregister_doorbell = vvu_unregister_doorbell; +} + +static void virtio_vhost_user_pci_initfn(Object *obj) +{ + VirtIOVhostUserPCI *dev = VIRTIO_VHOST_USER_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_VHOST_USER); +} + +static const VirtioPCIDeviceTypeInfo virtio_vhost_user_pci_info = { + .base_name = TYPE_VIRTIO_VHOST_USER_PCI, + .generic_name = "virtio-vhost-user-pci", + .instance_size = sizeof(VirtIOVhostUserPCI), + .instance_init = virtio_vhost_user_pci_initfn, + .class_size = sizeof(VirtioVhostUserPCIClass), + .class_init = virtio_vhost_user_pci_class_init, +}; + +static void virtio_vhost_user_pci_register_types(void) +{ + virtio_pci_types_register(&virtio_vhost_user_pci_info); +} + +type_init(virtio_vhost_user_pci_register_types); diff --git a/hw/virtio/virtio-vhost-user.c b/hw/virtio/virtio-vhost-user.c new file mode 100644 index 0000000000..6e4c6ec11c --- /dev/null +++ b/hw/virtio/virtio-vhost-user.c @@ -0,0 +1,1066 @@ +/* + * Virtio Vhost-user Device + * + * Copyright (C) 2017-2018 Red Hat, Inc. + * + * Authors: + * Stefan Hajnoczi <stefa...@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_ids.h" +#include "qapi/error.h" +#include "qemu/iov.h" +#include "qemu/sockets.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/virtio-vhost-user.h" +#include "trace.h" +#include "qemu/uuid.h" + +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, + VHOST_USER_PROTOCOL_F_NET_MTU = 4, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, + VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, + VHOST_USER_PROTOCOL_F_CONFIG = 9, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, + /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + VHOST_USER_PROTOCOL_F_MAX +}; + +/* vmstate migration version number */ +#define VIRTIO_VHOST_USER_VM_VERSION 0 + +/* + * Descriptor ring size. Only one vhost-user protocol message is processed at + * a time but later messages can be queued. + */ +#define VIRTIO_VHOST_USER_VIRTQUEUE_SIZE 128 + +/* Protocol features that have been implemented */ +#define SUPPORTED_VHOST_USER_FEATURES \ + (VHOST_USER_PROTOCOL_F_MQ | VHOST_USER_PROTOCOL_F_REPLY_ACK) + +/* + * Connection state machine + * + * The vhost-user frontend might not always be connected and the driver might + * not always be ready either. The device interface has a way to manage + * connection establishment: + * + * The driver indicates readiness with the VIRTIO_VHOST_USER_STATUS_BACKEND_UP + * status bit. The device then begins establishing a connection with the + * vhost-user frontend. The VIRTIO_VHOST_USER_STATUS_FRONTEND_UP status bit is + * set when connected. + * + * The driver may decide it wants to disconnect at any time. Vhost-user + * protocol violations and other errors might cause the device to give up on + * the connection too. + * + * This state machine captures all transitions in one place. This way the + * connection management code isn't sprinkled around many locations. + */ +typedef enum { + CONN_STATE_UNDEFINED, + CONN_STATE_INITIAL, /* !BACKEND_UP + !CHR_OPENED */ + CONN_STATE_BACKEND_UP, /* BACKEND_UP + !CHR_OPENED */ + CONN_STATE_CHR_OPENED, /* !BACKEND_UP + CHR_OPENED */ + CONN_STATE_CONNECTED, /* BACKEND_UP + CHR_OPENED */ + CONN_STATE_MAX +} ConnectionState; + +typedef enum { + /* Driver sets VIRTIO_VHOST_USER_STATUS_BACKEND_UP */ + CONN_EVENT_BACKEND_UP, + + /* Driver clears VIRTIO_VHOST_USER_STATUS_BACKEND_UP */ + CONN_EVENT_BACKEND_DOWN, + + /* Socket connected and also each time we update chardev handlers */ + CONN_EVENT_CHR_OPENED, + + /* Socket disconnected */ + CONN_EVENT_CHR_CLOSED, + + /* Socket chardev was replaced */ + CONN_EVENT_CHR_CHANGE, + + /* Socket I/O error */ + CONN_EVENT_SOCKET_ERROR, + + /* Virtio device reset */ + CONN_EVENT_DEVICE_RESET, + + /* Vhost-user protocol violation by frontend */ + CONN_EVENT_FRONTEND_EINVAL, + + /* Vhost-user protocol violation by backend */ + CONN_EVENT_BACKEND_EINVAL, + + CONN_EVENT_MAX +} ConnectionEvent; + +static void conn_state_transition(VirtIOVhostUser *s, ConnectionEvent evt); + +static void virtio_vhost_user_reset_async_state(VirtIOVhostUser *s) +{ + s->read_bytes_needed = 0; + s->read_ptr = NULL; + s->read_done = NULL; + s->read_waiting_on_rxq = false; + s->read_msg_size = 0; + + s->write_bytes_avail = 0; + s->write_ptr = NULL; + s->write_done = NULL; + if (s->write_watch_tag) { + g_source_remove(s->write_watch_tag); + } + s->write_watch_tag = 0; +} + +static void virtio_vhost_user_chr_event(void *opaque, QEMUChrEvent event) +{ + VirtIOVhostUser *s = opaque; + + trace_virtio_vhost_user_chr_event(s, event); + + switch (event) { + case CHR_EVENT_OPENED: + conn_state_transition(s, CONN_EVENT_CHR_OPENED); + break; + case CHR_EVENT_CLOSED: + conn_state_transition(s, CONN_EVENT_CHR_CLOSED); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static int virtio_vhost_user_chr_change(void *opaque) +{ + VirtIOVhostUser *s = opaque; + + trace_virtio_vhost_user_chr_change(s); + + if (s->config.status & VIRTIO_VHOST_USER_STATUS_FRONTEND_UP) { + conn_state_transition(s, CONN_EVENT_CHR_CHANGE); + } + return 0; +} + +static int virtio_vhost_user_chr_can_read(void *opaque) +{ + VirtIOVhostUser *s = opaque; + + return s->read_bytes_needed; +} + +static void virtio_vhost_user_chr_read(void *opaque, + const uint8_t *buf, int size) +{ + VirtIOVhostUser *s = opaque; + + assert(size <= s->read_bytes_needed); + + memcpy(s->read_ptr, buf, size); + s->read_ptr += size; + s->read_bytes_needed -= size; + + if (s->read_bytes_needed == 0) { + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, + virtio_vhost_user_chr_event, + virtio_vhost_user_chr_change, + s, NULL, false); + + s->read_done(s); + } +} + +/* Start reading from vhost-user socket */ +static void virtio_vhost_user_aio_read(VirtIOVhostUser *s, + void *buf, size_t len, + void (*done)(VirtIOVhostUser *s)) +{ + assert(s->read_bytes_needed == 0); + + s->read_ptr = buf; + s->read_bytes_needed = len; + s->read_done = done; + + qemu_chr_fe_set_handlers(&s->chr, + virtio_vhost_user_chr_can_read, + virtio_vhost_user_chr_read, + virtio_vhost_user_chr_event, + virtio_vhost_user_chr_change, + s, NULL, false); +} + +/* Called once with chan=NULL, cond=0 to begin and then called by event loop */ +static gboolean virtio_vhost_user_chr_write(void *do_not_use, GIOCondition cond, + void *opaque) +{ + VirtIOVhostUser *s = opaque; + int nwritten; + guint tag = s->write_watch_tag; + + nwritten = qemu_chr_fe_write(&s->chr, s->write_ptr, s->write_bytes_avail); + if (nwritten < 0) { + if (errno == EAGAIN) { + nwritten = 0; + } else { + conn_state_transition(s, CONN_EVENT_SOCKET_ERROR); + return G_SOURCE_REMOVE; + } + } + + s->write_bytes_avail -= nwritten; + if (s->write_bytes_avail == 0) { + s->write_done(s); + return G_SOURCE_REMOVE; + } + + if (tag == 0) { + tag = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP, + virtio_vhost_user_chr_write, s); + if (!tag) { + conn_state_transition(s, CONN_EVENT_SOCKET_ERROR); + return G_SOURCE_REMOVE; + } + + s->write_watch_tag = tag; + } + + return G_SOURCE_CONTINUE; +} + +/* Start writing to vhost-user socket */ +static void virtio_vhost_user_aio_write(VirtIOVhostUser *s, + void *buf, size_t len, + void (*done)(VirtIOVhostUser *s)) +{ + assert(s->write_bytes_avail == 0); + + s->write_ptr = buf; + s->write_bytes_avail = len; + s->write_done = done; + + virtio_vhost_user_chr_write(NULL, 0, s); +} + +static void virtio_vhost_user_cleanup_kickfds(VirtIOVhostUser *s) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(s->kickfds); i++) { + if (event_notifier_get_fd(&s->kickfds[i].guest_notifier) >= 0) { + /* Remove the kickfd from the main event loop */ + event_notifier_set_handler(&s->kickfds[i].guest_notifier, NULL); + close(s->kickfds[i].guest_notifier.rfd); + event_notifier_init_fd(&s->kickfds[i].guest_notifier, -1); + s->kickfds[i].msi_vector = VIRTIO_NO_VECTOR; + } + } +} + +static void virtio_vhost_user_cleanup_callfds(VirtIOVhostUser *s) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(s->callfds); i++) { + if (event_notifier_get_fd(&s->callfds[i]) >= 0) { + virtio_vhost_user_unregister_doorbell(s, &s->callfds[i], i); + close(s->callfds[i].rfd); + event_notifier_init_fd(&s->callfds[i], -1); + } + } +} + +static void virtio_vhost_user_cleanup_mem_table(VirtIOVhostUser *s) +{ + int i; + + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) { + VirtIOVhostUserMemTableRegion *region = &s->mem_table[i]; + + if (!region->mmap_addr) { + continue; + } + + munmap(region->mmap_addr, region->total_size); + region->mmap_addr = NULL; + + virtio_vhost_user_delete_vhost_mem_region(s, ®ion->mr); + } +} + +static void conn_action_set_backend_up(VirtIOVhostUser *s) +{ + /* Guest-initiated, no need for virtio_notify_config() */ + s->config.status = VIRTIO_VHOST_USER_STATUS_BACKEND_UP; +} + +static void conn_action_set_backend_down(VirtIOVhostUser *s) +{ + /* Guest-initiated, no need for virtio_notify_config() */ + s->config.status = 0; +} + +static void virtio_vhost_user_hdr_done(VirtIOVhostUser *s); + +static void conn_action_connect(VirtIOVhostUser *s) +{ + s->config.status = VIRTIO_VHOST_USER_STATUS_BACKEND_UP | + VIRTIO_VHOST_USER_STATUS_FRONTEND_UP; + virtio_notify_config(VIRTIO_DEVICE(s)); + + /* Begin servicing vhost-user messages */ + virtio_vhost_user_aio_read(s, &s->read_msg, VHOST_USER_HDR_SIZE, + virtio_vhost_user_hdr_done); +} + +static void conn_action_disconnect_no_notify(VirtIOVhostUser *s) +{ + qemu_chr_fe_set_handlers(&s->chr, + NULL, + NULL, + virtio_vhost_user_chr_event, + virtio_vhost_user_chr_change, + s, NULL, false); + qemu_chr_fe_set_open(&s->chr, 0); + + virtio_vhost_user_reset_async_state(s); + + /* TODO drain txq? */ + + /* + * It is only safe to clean up resources where future accesses have no + * guest-visible effects. Vcpus may still access resources if they haven't + * noticed the disconnect event yet. Callfds are safe since writes to + * invalid indices are ignored. Memory table regions cannot be unmapped + * since vring polling may still be running. + */ + virtio_vhost_user_cleanup_kickfds(s); + virtio_vhost_user_cleanup_callfds(s); + + s->config.status = 0; +} + +static void conn_action_disconnect(VirtIOVhostUser *s) +{ + conn_action_disconnect_no_notify(s); + virtio_notify_config(VIRTIO_DEVICE(s)); +} + +static const struct { + void (*action)(VirtIOVhostUser *s); + ConnectionState new_state; +} conn_state_machine[CONN_STATE_MAX][CONN_EVENT_MAX] = { + [CONN_STATE_INITIAL] = { + [CONN_EVENT_BACKEND_UP] = {conn_action_set_backend_up, + CONN_STATE_BACKEND_UP}, + [CONN_EVENT_CHR_OPENED] = {NULL, CONN_STATE_CHR_OPENED}, + [CONN_EVENT_CHR_CLOSED] = {NULL, CONN_STATE_INITIAL}, + [CONN_EVENT_CHR_CHANGE] = {NULL, CONN_STATE_INITIAL}, + [CONN_EVENT_DEVICE_RESET] = {NULL, CONN_STATE_INITIAL}, + }, + [CONN_STATE_BACKEND_UP] = { + [CONN_EVENT_BACKEND_DOWN] = {conn_action_set_backend_down, + CONN_STATE_INITIAL}, + [CONN_EVENT_CHR_OPENED] = {conn_action_connect, CONN_STATE_CONNECTED}, + [CONN_EVENT_CHR_CLOSED] = {NULL, CONN_STATE_BACKEND_UP}, + [CONN_EVENT_CHR_CHANGE] = {NULL, CONN_STATE_BACKEND_UP}, + [CONN_EVENT_DEVICE_RESET] = {conn_action_set_backend_down, + CONN_STATE_INITIAL}, + }, + [CONN_STATE_CHR_OPENED] = { + [CONN_EVENT_BACKEND_UP] = {conn_action_connect, CONN_STATE_CONNECTED}, + [CONN_EVENT_CHR_OPENED] = {NULL, CONN_STATE_CHR_OPENED}, + [CONN_EVENT_CHR_CLOSED] = {NULL, CONN_STATE_INITIAL}, + [CONN_EVENT_CHR_CHANGE] = {NULL, CONN_STATE_CHR_OPENED}, + [CONN_EVENT_DEVICE_RESET] = {NULL, CONN_STATE_INITIAL}, + }, + [CONN_STATE_CONNECTED] = { + [CONN_EVENT_BACKEND_DOWN] = {conn_action_disconnect_no_notify, + CONN_STATE_INITIAL}, + [CONN_EVENT_CHR_OPENED] = {NULL, CONN_STATE_CONNECTED}, + [CONN_EVENT_CHR_CLOSED] = {conn_action_disconnect, + CONN_STATE_INITIAL}, + [CONN_EVENT_CHR_CHANGE] = {conn_action_disconnect, CONN_STATE_INITIAL}, + [CONN_EVENT_SOCKET_ERROR] = {conn_action_disconnect, + CONN_STATE_INITIAL}, + [CONN_EVENT_DEVICE_RESET] = {conn_action_disconnect_no_notify, + CONN_STATE_INITIAL}, + [CONN_EVENT_FRONTEND_EINVAL] = {conn_action_disconnect, + CONN_STATE_INITIAL}, + [CONN_EVENT_BACKEND_EINVAL] = {conn_action_disconnect, + CONN_STATE_INITIAL}, + }, +}; + +static void conn_state_transition(VirtIOVhostUser *s, ConnectionEvent evt) +{ + ConnectionState old_state = s->conn_state; + ConnectionState new_state = conn_state_machine[old_state][evt].new_state; + + trace_virtio_vhost_user_conn_state_transition(s, old_state, evt, + new_state); + assert(new_state != CONN_STATE_UNDEFINED); + + s->conn_state = new_state; + + if (conn_state_machine[old_state][evt].action) { + conn_state_machine[old_state][evt].action(s); + } +} + +/* + * Frontend-to-backend message processing + * + * Messages are read from the vhost-user socket into s->read_msg. They are + * then parsed and may be modified. Finally they are put onto the rxq for the + * driver to read. + * + * Functions with "m2s" in their name handle the frontend-to-backend code path. + */ + +/* Put s->read_msg onto the rxq */ +static void virtio_vhost_user_deliver_m2s(VirtIOVhostUser *s) +{ + VirtQueueElement *elem; + size_t copied; + + elem = virtqueue_pop(s->rxq, sizeof(*elem)); + if (!elem) { + /* Leave message in s->read_msg and wait for rxq */ + trace_virtio_vhost_user_rxq_empty(s); + s->read_waiting_on_rxq = true; + return; + } + + s->read_waiting_on_rxq = false; + + copied = iov_from_buf(elem->in_sg, elem->in_num, 0, &s->read_msg, + s->read_msg_size); + if (copied != s->read_msg_size) { + g_free(elem); + virtio_error(VIRTIO_DEVICE(s), + "rxq buffer too small, got %zu, needed %zu", + copied, s->read_msg_size); + return; + } + + virtqueue_push(s->rxq, elem, copied); + g_free(elem); + + virtio_notify(VIRTIO_DEVICE(s), s->rxq); + + /* Next message, please */ + virtio_vhost_user_aio_read(s, &s->read_msg, VHOST_USER_HDR_SIZE, + virtio_vhost_user_hdr_done); +} + +static void m2s_get_vring_base(VirtIOVhostUser *s) +{ + unsigned int vq_idx; + + vq_idx = s->read_msg.payload.state.index; + + if (event_notifier_get_fd(&s->kickfds[vq_idx].guest_notifier) >= 0) { + /* Remove the kickfd from the main event loop */ + event_notifier_set_handler(&s->kickfds[vq_idx].guest_notifier, NULL); + close(s->kickfds[vq_idx].guest_notifier.rfd); + event_notifier_init_fd(&s->kickfds[vq_idx].guest_notifier, -1); + } + + if (event_notifier_get_fd(&s->callfds[vq_idx]) >= 0) { + virtio_vhost_user_unregister_doorbell(s, &s->callfds[vq_idx], vq_idx); + close(s->callfds[vq_idx].rfd); + event_notifier_init_fd(&s->callfds[vq_idx], -1); + } +} + +static void m2s_set_vring_kick(VirtIOVhostUser *s) +{ + uint8_t vq_idx; + int fd; + + vq_idx = s->read_msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; + + if (s->read_msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK) { + fd = -1; + } else { + fd = qemu_chr_fe_get_msgfd(&s->chr); + + /* Must not block when reach max eventfd counter value */ + qemu_socket_set_nonblock(fd); + } + + if (event_notifier_get_fd(&s->kickfds[vq_idx].guest_notifier) >= 0) { + /* Remove the kickfd from the main event loop */ + event_notifier_set_handler(&s->kickfds[vq_idx].guest_notifier, NULL); + close(s->kickfds[vq_idx].guest_notifier.rfd); + event_notifier_init_fd(&s->kickfds[vq_idx].guest_notifier, -1); + } + + /* Initialize the EventNotifier with the received kickfd */ + event_notifier_init_fd(&s->kickfds[vq_idx].guest_notifier, fd); + + /* Insert the kickfd in the main event loop */ + if (fd != -1) { + event_notifier_set_handler(&s->kickfds[vq_idx].guest_notifier, + virtio_vhost_user_guest_notifier_read); + } +} + +static void m2s_set_vring_call(VirtIOVhostUser *s) +{ + uint8_t vq_idx; + int fd; + + vq_idx = s->read_msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; + + /* We should always have a large enough array */ + QEMU_BUILD_BUG_ON(0xff >= ARRAY_SIZE(s->callfds)); + + if (s->read_msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK) { + fd = -1; + } else { + fd = qemu_chr_fe_get_msgfd(&s->chr); + + /* Must not block when reach max eventfd counter value */ + qemu_socket_set_nonblock(fd); + } + + if (event_notifier_get_fd(&s->callfds[vq_idx]) >= 0) { + virtio_vhost_user_unregister_doorbell(s, &s->callfds[vq_idx], vq_idx); + close(s->callfds[vq_idx].rfd); + event_notifier_init_fd(&s->callfds[vq_idx], -1); + } + + /* Initialize the EventNotifier with the received callfd */ + event_notifier_init_fd(&s->callfds[vq_idx], fd); + + /* Register the EventNotifier as an ioeventfd. */ + if (fd != -1) { + virtio_vhost_user_register_doorbell(s, &s->callfds[vq_idx], vq_idx); + } +} + +static void m2s_set_mem_table(VirtIOVhostUser *s) +{ + VhostUserMemory m = s->read_msg.payload.memory, *memory = &m; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int num_fds; + uint32_t i; + + if (memory->nregions > VHOST_MEMORY_MAX_NREGIONS) { + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); + return; + } + + num_fds = qemu_chr_fe_get_msgfds(&s->chr, fds, ARRAY_SIZE(fds)); + if (num_fds != memory->nregions) { + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); + return; + } + + virtio_vhost_user_cleanup_mem_table(s); + + for (i = 0; i < memory->nregions; i++) { + VhostUserMemoryRegion *input = &memory->regions[i]; + VirtIOVhostUserMemTableRegion *region = &s->mem_table[i]; + void *mmap_addr; + + region->total_size = input->mmap_offset + input->memory_size; + if (region->total_size < input->mmap_offset || + region->total_size < input->memory_size) { + goto err; + } + + mmap_addr = mmap(0, region->total_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fds[i], 0); + close(fds[i]); + fds[i] = -1; + if (mmap_addr == MAP_FAILED) { + goto err; + } + region->mmap_addr = mmap_addr; + + trace_virtio_vhost_user_memory_region(s, + memory->regions[i].guest_phys_addr, + memory->regions[i].memory_size, + memory->regions[i].userspace_addr, + memory->regions[i].mmap_offset, + region->mmap_addr); + } + + /* + * Export the mmapped vhost memory regions to the guest through PCI + * transport + */ + virtio_vhost_user_set_vhost_mem_regions(s); + + return; + +err: + for (i = 0; i < memory->nregions; i++) { + if (fds[i] >= 0) { + close(fds[i]); + } + } + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); +} + +static void m2s_set_protocol_features(VirtIOVhostUser *s) +{ + /* Only allow features we support too */ + s->read_msg.payload.u64 &= SUPPORTED_VHOST_USER_FEATURES; +} + +/* Parse s->read_msg from frontend */ +static void virtio_vhost_user_parse_m2s(VirtIOVhostUser *s) +{ + uint32_t version = s->read_msg.hdr.flags & VHOST_USER_VERSION_MASK; + + if (version != VHOST_USER_VERSION) { + trace_virtio_vhost_user_m2s_bad_version(s, version); + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); + return; + } + + if (s->read_msg.hdr.flags & VHOST_USER_REPLY_MASK) { + trace_virtio_vhost_user_m2s_unexpected_reply(s); + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); + return; + } + + if (s->read_msg.hdr.request >= VHOST_USER_MAX) { + trace_virtio_vhost_user_m2s_bad_request(s, s->read_msg.hdr.request); + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); + return; + } + + trace_virtio_vhost_user_m2s_request(s, s->read_msg.hdr.request); + + /* Most messages are passed through but a few need to be handled */ + switch (s->read_msg.hdr.request) { + case VHOST_USER_GET_FEATURES: + break; + case VHOST_USER_SET_FEATURES: + break; + case VHOST_USER_SET_OWNER: + break; + case VHOST_USER_RESET_OWNER: + break; + case VHOST_USER_SET_MEM_TABLE: + m2s_set_mem_table(s); + break; + case VHOST_USER_SET_VRING_NUM: + break; + case VHOST_USER_SET_VRING_ADDR: + break; + case VHOST_USER_SET_VRING_BASE: + break; + case VHOST_USER_GET_VRING_BASE: + m2s_get_vring_base(s); + break; + case VHOST_USER_SET_VRING_KICK: + m2s_set_vring_kick(s); + break; + case VHOST_USER_SET_VRING_CALL: + m2s_set_vring_call(s); + break; + case VHOST_USER_GET_PROTOCOL_FEATURES: + break; + case VHOST_USER_SET_PROTOCOL_FEATURES: + m2s_set_protocol_features(s); + break; + case VHOST_USER_GET_QUEUE_NUM: + break; + case VHOST_USER_SET_VRING_ENABLE: + break; + default: + trace_virtio_vhost_user_m2s_unknown_request(s, s->read_msg.hdr.request); + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); + return; + } + + /* Bail if a handler function reset the connection */ + if (s->conn_state != CONN_STATE_CONNECTED) { + return; + } + + /* Stash size before we endian-convert s->read_msg */ + s->read_msg_size = VHOST_USER_HDR_SIZE + s->read_msg.hdr.size; + + /* TODO convert read_msg to little-endian for cross-endian support */ + + virtio_vhost_user_deliver_m2s(s); +} + +static void virtio_vhost_user_hdr_done(VirtIOVhostUser *s) +{ + if (s->read_msg.hdr.size > VHOST_USER_PAYLOAD_SIZE) { + trace_virtio_vhost_user_m2s_bad_payload_size(s, s->read_msg.hdr.size); + conn_state_transition(s, CONN_EVENT_FRONTEND_EINVAL); + return; + } + + /* Clear out unused payload bytes */ + memset(&s->read_msg.payload, 0, VHOST_USER_PAYLOAD_SIZE); + + if (s->read_msg.hdr.size > 0) { + virtio_vhost_user_aio_read(s, &s->read_msg.payload, + s->read_msg.hdr.size, + virtio_vhost_user_parse_m2s); + } else { + virtio_vhost_user_parse_m2s(s); + } +} + +static void virtio_vhost_user_rxq(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOVhostUser *s = VIRTIO_VHOST_USER(vdev); + + if (s->read_waiting_on_rxq) { + virtio_vhost_user_deliver_m2s(s); + } +} + +/* + * Backend-to-frontend message processing + * + * Messages are read from the txq into s->write_msg. They are then parsed and + * may be modified. Finally they are written to the vhost-user socket. + * + * Functions with "s2m" in their name handle the backend-to-frontend code path. + */ + +static void s2m_get_protocol_features(VirtIOVhostUser *s) +{ + /* Only allow features we support too */ + s->write_msg.payload.u64 &= SUPPORTED_VHOST_USER_FEATURES; +} + +static void virtio_vhost_user_tx_done(VirtIOVhostUser *s); + +/* Parse s->write_msg from backend */ +static void virtio_vhost_user_parse_s2m(VirtIOVhostUser *s) +{ + uint32_t version = s->write_msg.hdr.flags & VHOST_USER_VERSION_MASK; + + if (version != VHOST_USER_VERSION) { + trace_virtio_vhost_user_s2m_bad_version(s, version); + conn_state_transition(s, CONN_EVENT_BACKEND_EINVAL); + return; + } + + if (!(s->write_msg.hdr.flags & VHOST_USER_REPLY_MASK)) { + trace_virtio_vhost_user_s2m_expected_reply(s); + conn_state_transition(s, CONN_EVENT_BACKEND_EINVAL); + return; + } + + if (s->write_msg.hdr.request >= VHOST_USER_MAX) { + trace_virtio_vhost_user_s2m_bad_request(s, s->write_msg.hdr.request); + conn_state_transition(s, CONN_EVENT_BACKEND_EINVAL); + return; + } + + trace_virtio_vhost_user_s2m_request(s, s->write_msg.hdr.request); + + /* Very few messages need to be touched */ + switch (s->write_msg.hdr.request) { + case VHOST_USER_GET_FEATURES: + break; + case VHOST_USER_SET_FEATURES: + break; + case VHOST_USER_SET_OWNER: + break; + case VHOST_USER_RESET_OWNER: + break; + case VHOST_USER_SET_MEM_TABLE: + break; + case VHOST_USER_SET_VRING_NUM: + break; + case VHOST_USER_SET_VRING_ADDR: + break; + case VHOST_USER_SET_VRING_BASE: + break; + case VHOST_USER_GET_VRING_BASE: + break; + case VHOST_USER_SET_VRING_KICK: + break; + case VHOST_USER_SET_VRING_CALL: + break; + case VHOST_USER_GET_PROTOCOL_FEATURES: + s2m_get_protocol_features(s); + break; + case VHOST_USER_SET_PROTOCOL_FEATURES: + break; + case VHOST_USER_GET_QUEUE_NUM: + break; + case VHOST_USER_SET_VRING_ENABLE: + break; + default: + trace_virtio_vhost_user_s2m_unknown_request(s, + s->write_msg.hdr.request); + conn_state_transition(s, CONN_EVENT_BACKEND_EINVAL); + return; + } + + /* Bail if a handler function reset the connection */ + if (s->conn_state != CONN_STATE_CONNECTED) { + return; + } + + virtio_vhost_user_aio_write(s, &s->write_msg, + VHOST_USER_HDR_SIZE + s->write_msg.hdr.size, + virtio_vhost_user_tx_done); +} + +static void virtio_vhost_user_txq(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIOVhostUser *s = VIRTIO_VHOST_USER(vdev); + VirtQueueElement *elem; + size_t msgsize; + size_t copied; + + /* If the last message is still being transferred we'll come back later */ + if (s->write_bytes_avail != 0) { + return; + } + + elem = virtqueue_pop(s->txq, sizeof(*elem)); + if (!elem) { + return; /* no elements left on virtqueue */ + } + + msgsize = iov_size(elem->out_sg, elem->out_num); + if (msgsize < VHOST_USER_HDR_SIZE || msgsize > sizeof(s->write_msg)) { + g_free(elem); + virtio_error(VIRTIO_DEVICE(s), + "invalid txq buffer size, got %zu", msgsize); + return; + } + + /* Clear out unused payload bytes */ + memset(&s->write_msg.payload, 0, VHOST_USER_PAYLOAD_SIZE); + + copied = iov_to_buf(elem->out_sg, elem->out_num, 0, + &s->write_msg, msgsize); + if (copied != VHOST_USER_HDR_SIZE + s->write_msg.hdr.size || + copied != msgsize) { + g_free(elem); + virtio_error(VIRTIO_DEVICE(s), + "invalid txq buffer size, got %zu", msgsize); + return; + } + + virtqueue_push(s->txq, elem, copied); + g_free(elem); + + virtio_notify(VIRTIO_DEVICE(s), s->txq); + + /* TODO convert from little-endian */ + + virtio_vhost_user_parse_s2m(s); +} + +static void virtio_vhost_user_tx_done(VirtIOVhostUser *s) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(s); + VirtQueue *vq = s->txq; + + trace_virtio_vhost_user_tx_done(s); + + /* Try to process more messages from the driver */ + virtio_vhost_user_txq(vdev, vq); +} + +static uint64_t +virtio_vhost_user_get_features(VirtIODevice *vdev, + uint64_t requested_features, + Error **errp) +{ + return requested_features; +} + +static void virtio_vhost_user_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIOVhostUserConfig *vvuconfig = (VirtIOVhostUserConfig *)config; + VirtIOVhostUser *s = VIRTIO_VHOST_USER(vdev); + + virtio_stl_p(vdev, &vvuconfig->status, s->config.status); + virtio_stl_p(vdev, &vvuconfig->max_vhost_queues, + s->config.max_vhost_queues); + memcpy(vvuconfig->uuid, s->config.uuid, sizeof(vvuconfig->uuid)); +} + +static void virtio_vhost_user_set_config(VirtIODevice *vdev, + const uint8_t *config) +{ + VirtIOVhostUserConfig *vvuconfig = (VirtIOVhostUserConfig *)config; + VirtIOVhostUser *s = VIRTIO_VHOST_USER(vdev); + uint32_t status; + bool old_backend_up; + bool new_backend_up; + + status = virtio_ldl_p(vdev, &vvuconfig->status); + trace_virtio_vhost_user_set_config(s, s->config.status, status); + if (status & ~(VIRTIO_VHOST_USER_STATUS_BACKEND_UP | + VIRTIO_VHOST_USER_STATUS_FRONTEND_UP)) { + virtio_error(vdev, "undefined virtio-vhost-user status bit set " + "(0x%x)", status); + return; + } + + old_backend_up = s->config.status & VIRTIO_VHOST_USER_STATUS_BACKEND_UP; + new_backend_up = status & VIRTIO_VHOST_USER_STATUS_BACKEND_UP; + + if (!old_backend_up && new_backend_up) { + conn_state_transition(s, CONN_EVENT_BACKEND_UP); + } else if (old_backend_up && !new_backend_up) { + conn_state_transition(s, CONN_EVENT_BACKEND_DOWN); + } +} + +static void virtio_vhost_user_reset(VirtIODevice *vdev) +{ + VirtIOVhostUser *s = VIRTIO_VHOST_USER(vdev); + + conn_state_transition(s, CONN_EVENT_DEVICE_RESET); + + virtio_vhost_user_reset_async_state(s); +} + +static void virtio_vhost_user_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOVhostUser *s = VIRTIO_VHOST_USER(dev); + size_t i; + + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Missing chardev"); + return; + } + + for (i = 0; i < ARRAY_SIZE(s->kickfds); i++) { + s->kickfds[i].vdev = vdev; + event_notifier_init_fd(&s->kickfds[i].guest_notifier, -1); + s->kickfds[i].msi_vector = VIRTIO_NO_VECTOR; + } + + for (i = 0; i < ARRAY_SIZE(s->callfds); i++) { + event_notifier_init_fd(&s->callfds[i], -1); + } + + virtio_init(vdev, VIRTIO_ID_VHOST_USER, + sizeof(VirtIOVhostUserConfig)); + + s->rxq = virtio_add_queue(vdev, VIRTIO_VHOST_USER_VIRTQUEUE_SIZE, + virtio_vhost_user_rxq); + s->txq = virtio_add_queue(vdev, VIRTIO_VHOST_USER_VIRTQUEUE_SIZE, + + virtio_vhost_user_txq); + /* Each vhost-user queue uses doorbells and a notification resources */ + s->config.max_vhost_queues = 1024; + + /* Generate a uuid */ + QemuUUID uuid; + qemu_uuid_generate(&uuid); + memcpy(s->config.uuid, uuid.data, sizeof(uuid.data)); + + virtio_vhost_user_reset_async_state(s); + + s->conn_state = CONN_STATE_INITIAL; + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, virtio_vhost_user_chr_event, + virtio_vhost_user_chr_change, s, NULL, false); +} + +static void virtio_vhost_user_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOVhostUser *s = VIRTIO_VHOST_USER(vdev); + + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, + NULL, NULL, NULL, false); + virtio_cleanup(vdev); + virtio_vhost_user_cleanup_mem_table(s); + virtio_vhost_user_cleanup_kickfds(s); + virtio_vhost_user_cleanup_callfds(s); + virtio_vhost_user_cleanup_additional_resources(s); +} + +static const VMStateDescription vmstate_virtio_vhost_user_device = { + .name = "virtio-vhost-user-device", + .version_id = VIRTIO_VHOST_USER_VM_VERSION, + .minimum_version_id = VIRTIO_VHOST_USER_VM_VERSION, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_virtio_vhost_user = { + .name = "virtio-vhost-user", + .minimum_version_id = VIRTIO_VHOST_USER_VM_VERSION, + .version_id = VIRTIO_VHOST_USER_VM_VERSION, + .fields = (VMStateField[]) { + VMSTATE_INT32(conn_state, VirtIOVhostUser), + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static Property virtio_vhost_user_properties[] = { + DEFINE_PROP_CHR("chardev", VirtIOVhostUser, chr), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_vhost_user_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, virtio_vhost_user_properties); + dc->vmsd = &vmstate_virtio_vhost_user; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->realize = virtio_vhost_user_device_realize; + vdc->unrealize = virtio_vhost_user_device_unrealize; + vdc->get_config = virtio_vhost_user_get_config; + vdc->set_config = virtio_vhost_user_set_config; + vdc->get_features = virtio_vhost_user_get_features; + vdc->reset = virtio_vhost_user_reset; + vdc->vmsd = &vmstate_virtio_vhost_user_device; +} + +static const TypeInfo virtio_vhost_user_info = { + .name = TYPE_VIRTIO_VHOST_USER, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIOVhostUser), + .class_init = virtio_vhost_user_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_vhost_user_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 5d607aeaa0..89870ddd0b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -171,7 +171,8 @@ const char *virtio_device_names[] = { [VIRTIO_ID_PARAM_SERV] = "virtio-param-serv", [VIRTIO_ID_AUDIO_POLICY] = "virtio-audio-pol", [VIRTIO_ID_BT] = "virtio-bluetooth", - [VIRTIO_ID_GPIO] = "virtio-gpio" + [VIRTIO_ID_GPIO] = "virtio-gpio", + [VIRTIO_ID_VHOST_USER] = "virtio-vhost-user" }; static const char *virtio_id_to_name(uint16_t device_id) @@ -1937,7 +1938,7 @@ void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, } /* virtio device */ -static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) +void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector) { BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); @@ -2454,7 +2455,7 @@ void virtio_del_queue(VirtIODevice *vdev, int n) virtio_delete_queue(&vdev->vq[n]); } -static void virtio_set_isr(VirtIODevice *vdev, int value) +void virtio_set_isr(VirtIODevice *vdev, int value) { uint8_t old = qatomic_read(&vdev->isr); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 44dacfa224..9cbeced174 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -86,6 +86,7 @@ extern bool pci_available; #define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013 #define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014 #define PCI_DEVICE_ID_VIRTIO_MEM 0x1015 +#define PCI_DEVICE_ID_VIRTIO_VHOST_USER 0x1016 #define PCI_VENDOR_ID_REDHAT 0x1b36 #define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001 diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 2446dcd9ae..66147a7446 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -18,6 +18,7 @@ #include "hw/pci/msi.h" #include "hw/virtio/virtio-bus.h" #include "qom/object.h" +#include "standard-headers/linux/virtio_pci.h" /* virtio-pci-bus */ @@ -252,4 +253,10 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); */ unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); +int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy); +void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy, + VirtIOPCIRegion *region, + struct virtio_pci_cap *cap, + MemoryRegion *mr, + uint8_t bar); #endif diff --git a/include/hw/virtio/virtio-vhost-user.h b/include/hw/virtio/virtio-vhost-user.h new file mode 100644 index 0000000000..fda64a1c71 --- /dev/null +++ b/include/hw/virtio/virtio-vhost-user.h @@ -0,0 +1,126 @@ +/* + * Virtio Vhost-user Device + * + * Copyright (C) 2017 Red Hat, Inc. + * + * Authors: + * Stefan Hajnoczi <stefa...@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_VIRTIO_VHOST_USER_H +#define QEMU_VIRTIO_VHOST_USER_H + +#include "hw/virtio/virtio.h" +#include "standard-headers/linux/virtio_pci.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" + +#define TYPE_VIRTIO_VHOST_USER "virtio-vhost-user-device" +#define VIRTIO_VHOST_USER(obj) \ + OBJECT_CHECK(VirtIOVhostUser, (obj), TYPE_VIRTIO_VHOST_USER) + +/* Macros for the offsets in virtio notification structure */ +#define NOTIFICATION_SELECT 0 +#define NOTIFICATION_MSIX_VECTOR 2 + +/* Macros for the additional resources configuration types */ +#define VIRTIO_PCI_CAP_DOORBELL_CFG 6 +#define VIRTIO_PCI_CAP_NOTIFICATION_CFG 7 +#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8 + +/* The virtio configuration space fields */ +typedef struct { + uint32_t status; +#define VIRTIO_VHOST_USER_STATUS_BACKEND_UP (1 << 0) +#define VIRTIO_VHOST_USER_STATUS_FRONTEND_UP (1 << 1) + uint32_t max_vhost_queues; + uint8_t uuid[16]; +} QEMU_PACKED VirtIOVhostUserConfig; + +/* Keep track of the mmap for each memory table region */ +typedef struct { + MemoryRegion mr; + void *mmap_addr; + size_t total_size; +} VirtIOVhostUserMemTableRegion; + +struct kickfd { + VirtIODevice *vdev; + EventNotifier guest_notifier; + uint16_t msi_vector; +}; + +/* Additional resources configuration structures */ + +/* Doorbell structure layout */ +struct virtio_pci_doorbell_cap { + struct virtio_pci_cap cap; + uint32_t doorbell_off_multiplier; +}; + +/* Notification structure layout */ +struct virtio_pci_notification_cfg { + uint16_t notification_select; /* read-write */ + uint16_t notification_msix_vector; /* read-write */ +}; + +typedef struct VirtIOVhostUser VirtIOVhostUser; +struct VirtIOVhostUser { + VirtIODevice parent_obj; + + /* The vhost-user socket */ + CharBackend chr; + + /* notification select */ + uint16_t nselect; + /* Eventfds from VHOST_USER_SET_VRING_KICK along with the MSI-X vectors. */ + struct kickfd kickfds[VIRTIO_QUEUE_MAX]; + + /* Eventfds from VHOST_USER_SET_VRING_CALL */ + EventNotifier callfds[VIRTIO_QUEUE_MAX]; + + /* Mapped memory regions from VHOST_USER_SET_MEM_TABLE */ + VirtIOVhostUserMemTableRegion mem_table[VHOST_MEMORY_MAX_NREGIONS]; + + VirtIOVhostUserConfig config; + + /* Connection establishment state */ + int conn_state; + + /* Device-to-driver message queue */ + VirtQueue *rxq; + + /* Driver-to-device message queue */ + VirtQueue *txq; + + /* Asynchronous read state */ + int read_bytes_needed; + void *read_ptr; + void (*read_done)(VirtIOVhostUser *s); + VhostUserMsg read_msg; + bool read_waiting_on_rxq; /* need rx buffer? */ + size_t read_msg_size; + + /* Asynchronous write state */ + int write_bytes_avail; + void *write_ptr; + void (*write_done)(VirtIOVhostUser *s); + VhostUserMsg write_msg; + guint write_watch_tag; +}; + +void virtio_vhost_user_set_vhost_mem_regions(VirtIOVhostUser *s); +void virtio_vhost_user_delete_vhost_mem_region(VirtIOVhostUser *s, + MemoryRegion *mr); +void virtio_vhost_user_cleanup_additional_resources(VirtIOVhostUser *s); +void virtio_vhost_user_register_doorbell(VirtIOVhostUser *s, EventNotifier *e, + uint8_t vq_idx); +void virtio_vhost_user_unregister_doorbell(VirtIOVhostUser *s, EventNotifier *e, + uint8_t vq_idx); +void virtio_vhost_user_guest_notifier_read(EventNotifier *n); + +#endif /* QEMU_VIRTIO_VHOST_USER_H */ diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index db1c0ddf6b..68476186c2 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -324,6 +324,8 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); VirtQueue *virtio_vector_next_queue(VirtQueue *vq); +void virtio_set_isr(VirtIODevice *vdev, int value); +void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector); static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) { diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index 80d76b75bc..6249506ddd 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -68,6 +68,7 @@ #define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ #define VIRTIO_ID_GPIO 41 /* virtio gpio */ +#define VIRTIO_ID_VHOST_USER 43 /* virtio vhost-user */ /* * Virtio Transitional IDs -- 2.25.1