Used for communication with VFIO driver (prep work for vfio-user, which will communicate over a socket)
Signed-off-by: John G Johnson <john.g.john...@oracle.com> --- include/hw/vfio/vfio-common.h | 33 +++++++++++ hw/vfio/common.c | 126 ++++++++++++++++++++++++++++-------------- 2 files changed, 117 insertions(+), 42 deletions(-) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 8af11b0..2761a62 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace { } VFIOAddressSpace; struct VFIOGroup; +typedef struct VFIOContIO VFIOContIO; typedef struct VFIOContainer { VFIOAddressSpace *space; @@ -83,6 +84,7 @@ typedef struct VFIOContainer { MemoryListener prereg_listener; unsigned iommu_type; Error *error; + VFIOContIO *io_ops; bool initialized; bool dirty_pages_supported; uint64_t dirty_pgsizes; @@ -154,6 +156,37 @@ struct VFIODeviceOps { int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); }; +#ifdef CONFIG_LINUX + +/* + * The next 2 ops vectors are how Devices and Containers + * communicate with the server. The default option is + * through ioctl() to the kernel VFIO driver, but vfio-user + * can use a socket to a remote process. + */ + +struct VFIOContIO { + int (*dma_map)(VFIOContainer *container, + struct vfio_iommu_type1_dma_map *map); + int (*dma_unmap)(VFIOContainer *container, + struct vfio_iommu_type1_dma_unmap *unmap, + struct vfio_bitmap *bitmap); + int (*dirty_bitmap)(VFIOContainer *container, + struct vfio_iommu_type1_dirty_bitmap *bitmap, + struct vfio_iommu_type1_dirty_bitmap_get *range); +}; + +#define CONT_DMA_MAP(cont, map) \ + ((cont)->io_ops->dma_map((cont), (map))) +#define CONT_DMA_UNMAP(cont, unmap, bitmap) \ + ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap))) +#define CONT_DIRTY_BITMAP(cont, bitmap, range) \ + ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range))) + +extern VFIOContIO vfio_cont_io_ioctl; + +#endif /* CONFIG_LINUX */ + typedef struct VFIOGroup { int fd; int groupid; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 080046e..dbf23c0 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -431,12 +431,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, goto unmap_exit; } - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); + ret = CONT_DMA_UNMAP(container, unmap, bitmap); if (!ret) { cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, iotlb->translated_addr, pages); } else { - error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); + error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %s", strerror(-ret)); } g_free(bitmap->data); @@ -464,30 +464,7 @@ static int vfio_dma_unmap(VFIOContainer *container, return vfio_dma_unmap_bitmap(container, iova, size, iotlb); } - while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - /* - * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c - * v4.15) where an overflow in its wrap-around check prevents us from - * unmapping the last page of the address space. Test for the error - * condition and re-try the unmap excluding the last page. The - * expectation is that we've never mapped the last page anyway and this - * unmap request comes via vIOMMU support which also makes it unlikely - * that this page is used. This bug was introduced well after type1 v2 - * support was introduced, so we shouldn't need to test for v1. A fix - * is queued for kernel v5.0 so this workaround can be removed once - * affected kernels are sufficiently deprecated. - */ - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { - trace_vfio_dma_unmap_overflow_workaround(); - unmap.size -= 1ULL << ctz64(container->pgsizes); - continue; - } - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); - return -errno; - } - - return 0; + return CONT_DMA_UNMAP(container, &unmap, NULL); } static int vfio_dma_map(VFIOContainer *container, hwaddr iova, @@ -500,24 +477,18 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, .iova = iova, .size = size, }; + int ret; if (!readonly) { map.flags |= VFIO_DMA_MAP_FLAG_WRITE; } - /* - * Try the mapping, if it fails with EBUSY, unmap the region and try - * again. This shouldn't be necessary, but we sometimes see it in - * the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || - (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } + ret = CONT_DMA_MAP(container, &map); - error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); - return -errno; + if (ret < 0) { + error_report("VFIO_MAP_DMA failed: %s", strerror(-ret)); + } + return ret; } static void vfio_host_win_add(VFIOContainer *container, @@ -1230,10 +1201,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); + ret = CONT_DIRTY_BITMAP(container, &dirty, NULL); if (ret) { error_report("Failed to set dirty tracking flag 0x%x errno: %d", - dirty.flags, errno); + dirty.flags, -ret); } } @@ -1283,11 +1254,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, goto err_out; } - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + ret = CONT_DIRTY_BITMAP(container, dbitmap, range); if (ret) { error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, - (uint64_t)range->size, errno); + (uint64_t)range->size, -ret); goto err_out; } @@ -2058,6 +2029,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->error = NULL; container->dirty_pages_supported = false; container->dma_max_mappings = 0; + container->io_ops = &vfio_cont_io_ioctl; QLIST_INIT(&container->giommu_list); QLIST_INIT(&container->hostwin_list); QLIST_INIT(&container->vrdl_list); @@ -2594,3 +2566,73 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op) } return vfio_eeh_container_op(container, op); } + +/* + * Traditional ioctl() based io_ops + */ + +static int vfio_io_dma_map(VFIOContainer *container, + struct vfio_iommu_type1_dma_map *map) +{ + + /* + * Try the mapping, if it fails with EBUSY, unmap the region and try + * again. This shouldn't be necessary, but we sometimes see it in + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0 || + (errno == EBUSY && + vfio_dma_unmap(container, map->iova, map->size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0)) { + return 0; + } + return -errno; +} + +static int vfio_io_dma_unmap(VFIOContainer *container, + struct vfio_iommu_type1_dma_unmap *unmap, + struct vfio_bitmap *bitmap) +{ + + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c + * v4.15) where an overflow in its wrap-around check prevents us from + * unmapping the last page of the address space. Test for the error + * condition and re-try the unmap excluding the last page. The + * expectation is that we've never mapped the last page anyway and this + * unmap request comes via vIOMMU support which also makes it unlikely + * that this page is used. This bug was introduced well after type1 v2 + * support was introduced, so we shouldn't need to test for v1. A fix + * is queued for kernel v5.0 so this workaround can be removed once + * affected kernels are sufficiently deprecated. + */ + if (errno == EINVAL && unmap->size && !(unmap->iova + unmap->size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { + trace_vfio_dma_unmap_overflow_workaround(); + unmap->size -= 1ULL << ctz64(container->pgsizes); + continue; + } + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); + return -errno; + } + + return 0; +} + +static int vfio_io_dirty_bitmap(VFIOContainer *container, + struct vfio_iommu_type1_dirty_bitmap *bitmap, + struct vfio_iommu_type1_dirty_bitmap_get *range) +{ + int ret; + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, bitmap); + + return ret < 0 ? -errno : ret; +} + +VFIOContIO vfio_cont_io_ioctl = { + .dma_map = vfio_io_dma_map, + .dma_unmap = vfio_io_dma_unmap, + .dirty_bitmap = vfio_io_dirty_bitmap, +}; -- 1.8.3.1