On Wed, Aug 13, 2025 at 7:49 AM Jason Wang <jasow...@redhat.com> wrote: > > This patch introduces map operations for virtio device. Virtio use to > use DMA API which is not necessarily the case since some devices > doesn't do DMA. Instead of using tricks and abusing DMA API, let's > simply abstract the current mapping logic into a virtio specific > mapping operations. For the device or transport that doesn't do DMA, > they can implement their own mapping logic without the need to trick > DMA core. In this case the map_token is opaque to the virtio core that > will be passed back to the transport or device specific map > operations. For other devices, DMA API will still be used, so map > token will still be the dma device to minimize the changeset and > performance impact. > > The mapping operations are abstracted as a independent structure > instead of reusing virtio_config_ops. This allows the transport can > simply reuse the structure for lower layers like vDPA. > > A set of new mapping helpers were introduced for the device that want > to do mapping by themselves. >
Acked-by: Eugenio Pérez <epere...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > drivers/virtio/virtio_ring.c | 217 +++++++++++++++++++++++++++------- > include/linux/virtio.h | 26 +++- > include/linux/virtio_config.h | 72 +++++++++++ > 3 files changed, 271 insertions(+), 44 deletions(-) > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index 94b2a8f3acc2..86188ffbce00 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -297,8 +297,14 @@ size_t virtio_max_dma_size(const struct virtio_device > *vdev) > { > size_t max_segment_size = SIZE_MAX; > > - if (vring_use_map_api(vdev)) > - max_segment_size = dma_max_mapping_size(vdev->dev.parent); > + if (vring_use_map_api(vdev)) { > + if (vdev->map) > + max_segment_size = > + vdev->map->max_mapping_size(vdev->dev.parent); > + else > + max_segment_size = > + dma_max_mapping_size(vdev->dev.parent); > + } > > return max_segment_size; > } > @@ -309,8 +315,8 @@ static void *vring_alloc_queue(struct virtio_device > *vdev, size_t size, > union vring_mapping_token *mapping_token) > { > if (vring_use_map_api(vdev)) { > - return dma_alloc_coherent(mapping_token->dma_dev, size, > - map_handle, flag); > + return virtqueue_map_alloc_coherent(vdev, mapping_token, size, > + map_handle, flag); > } else { > void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); > > @@ -343,7 +349,8 @@ static void vring_free_queue(struct virtio_device *vdev, > size_t size, > union vring_mapping_token *mapping_token) > { > if (vring_use_map_api(vdev)) > - dma_free_coherent(mapping_token->dma_dev, size, queue, > map_handle); > + virtqueue_map_free_coherent(vdev, mapping_token, size, > + queue, map_handle); > else > free_pages_exact(queue, PAGE_ALIGN(size)); > } > @@ -358,6 +365,25 @@ static struct device *vring_dma_dev(const struct > vring_virtqueue *vq) > return vq->mapping_token.dma_dev; > } > > +static void *vring_mapping_token(const struct vring_virtqueue *vq) > +{ > + return vq->mapping_token.token; > +} > + > +static int vring_mapping_error(const struct vring_virtqueue *vq, > + dma_addr_t addr) > +{ > + struct virtio_device *vdev = vq->vq.vdev; > + > + if (!vq->use_map_api) > + return 0; > + > + if (vdev->map) > + return vdev->map->mapping_error(vring_mapping_token(vq), > addr); > + else > + return dma_mapping_error(vring_dma_dev(vq), addr); > +} > + > /* Map one sg entry. */ > static int vring_map_one_sg(const struct vring_virtqueue *vq, struct > scatterlist *sg, > enum dma_data_direction direction, dma_addr_t > *addr, > @@ -387,11 +413,11 @@ static int vring_map_one_sg(const struct > vring_virtqueue *vq, struct scatterlist > * the way it expects (we don't guarantee that the scatterlist > * will exist for the lifetime of the mapping). > */ > - *addr = dma_map_page(vring_dma_dev(vq), > - sg_page(sg), sg->offset, sg->length, > - direction); > + *addr = virtqueue_map_page_attrs(&vq->vq, sg_page(sg), > + sg->offset, sg->length, > + direction, 0); > > - if (dma_mapping_error(vring_dma_dev(vq), *addr)) > + if (vring_mapping_error(vq, *addr)) > return -ENOMEM; > > return 0; > @@ -408,15 +434,6 @@ static dma_addr_t vring_map_single(const struct > vring_virtqueue *vq, > size, direction, 0); > } > > -static int vring_mapping_error(const struct vring_virtqueue *vq, > - dma_addr_t addr) > -{ > - if (!vq->use_map_api) > - return 0; > - > - return dma_mapping_error(vring_dma_dev(vq), addr); > -} > - > static void virtqueue_init(struct vring_virtqueue *vq, u32 num) > { > vq->vq.num_free = num; > @@ -453,11 +470,12 @@ static unsigned int vring_unmap_one_split(const struct > vring_virtqueue *vq, > } else if (!vring_need_unmap_buffer(vq, extra)) > goto out; > > - dma_unmap_page(vring_dma_dev(vq), > - extra->addr, > - extra->len, > - (flags & VRING_DESC_F_WRITE) ? > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > + virtqueue_unmap_page_attrs(&vq->vq, > + extra->addr, > + extra->len, > + (flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE, > + 0); > > out: > return extra->next; > @@ -1271,10 +1289,11 @@ static void vring_unmap_extra_packed(const struct > vring_virtqueue *vq, > } else if (!vring_need_unmap_buffer(vq, extra)) > return; > > - dma_unmap_page(vring_dma_dev(vq), > - extra->addr, extra->len, > - (flags & VRING_DESC_F_WRITE) ? > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > + virtqueue_unmap_page_attrs(&vq->vq, > + extra->addr, extra->len, > + (flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE, > + 0); > } > > static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, > @@ -2434,8 +2453,7 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_premapped); > struct device *virtqueue_dma_dev(struct virtqueue *_vq) > { > struct vring_virtqueue *vq = to_vvq(_vq); > - > - if (vq->use_map_api) > + if (vq->use_map_api && !_vq->vdev->map) > return vq->mapping_token.dma_dev; > else > return NULL; > @@ -3125,6 +3143,107 @@ const struct vring *virtqueue_get_vring(const struct > virtqueue *vq) > } > EXPORT_SYMBOL_GPL(virtqueue_get_vring); > > +/** > + * virtqueue_map_alloc_coherent - alloc coherent mapping > + * @vdev: the virtio device we are talking to > + * @mapping_token: device specific mapping token > + * @size: the size of the buffer > + * @map_handle: the pointer to the mapped address > + * @gfp: allocation flag (GFP_XXX) > + * > + * return virtual address or NULL on error > + */ > +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, > + union vring_mapping_token *mapping_token, > + size_t size, dma_addr_t *map_handle, > + gfp_t gfp) > +{ > + if (vdev->map) > + return vdev->map->alloc(mapping_token->token, size, > + map_handle, gfp); > + else > + return dma_alloc_coherent(mapping_token->dma_dev, size, > + map_handle, gfp); > +} > +EXPORT_SYMBOL_GPL(virtqueue_map_alloc_coherent); > + > +/** > + * virtqueue_map_free_coherent - free coherent mapping > + * @vdev: the virtio device we are talking to > + * @token: device specific mapping token > + * @size: the size of the buffer > + * @map_handle: the mapped address that needs to be freed > + * > + */ > +void virtqueue_map_free_coherent(struct virtio_device *vdev, > + union vring_mapping_token *mapping_token, > size_t size, void *vaddr, > + dma_addr_t map_handle) > +{ > + if (vdev->map) > + vdev->map->free(mapping_token->token, size, vaddr, > + map_handle, 0); > + else > + dma_free_coherent(mapping_token->dma_dev, size, vaddr, > map_handle); > +} > +EXPORT_SYMBOL_GPL(virtqueue_map_free_coherent); > + > +/** > + * virtqueue_map_page_attrs - map a page to the device > + * @_vq: the virtqueue we are talking to > + * @page: the page that will be mapped by the device > + * @offset: the offset in the page for a buffer > + * @size: the buffer size > + * @dir: mapping direction > + * @attrs: mapping attributes > + * > + * Returns mapped address. Caller should check that by > virtqueue_mapping_error(). > + */ > +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, > + struct page *page, > + unsigned long offset, > + size_t size, > + enum dma_data_direction dir, > + unsigned long attrs) > +{ > + const struct vring_virtqueue *vq = to_vvq(_vq); > + struct virtio_device *vdev = _vq->vdev; > + > + if (vdev->map) > + return vdev->map->map_page(vring_mapping_token(vq), > + page, offset, size, > + dir, attrs); > + > + return dma_map_page_attrs(vring_dma_dev(vq), > + page, offset, size, > + dir, attrs); > +} > +EXPORT_SYMBOL_GPL(virtqueue_map_page_attrs); > + > +/** > + * virtqueue_unmap_page_attrs - map a page to the device > + * @_vq: the virtqueue we are talking to > + * @map_handle: the mapped address > + * @size: the buffer size > + * @dir: mapping direction > + * @attrs: unmapping attributes > + */ > +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, > + dma_addr_t map_handle, > + size_t size, enum dma_data_direction dir, > + unsigned long attrs) > +{ > + const struct vring_virtqueue *vq = to_vvq(_vq); > + struct virtio_device *vdev = _vq->vdev; > + > + if (vdev->map) > + vdev->map->unmap_page(vring_mapping_token(vq), map_handle, > + size, dir, attrs); > + else > + dma_unmap_page_attrs(vring_dma_dev(vq), map_handle, > + size, dir, attrs); > +} > +EXPORT_SYMBOL_GPL(virtqueue_unmap_page_attrs); > + > /** > * virtqueue_map_single_attrs - map DMA for _vq > * @_vq: the struct virtqueue we're talking about. > @@ -3136,7 +3255,7 @@ EXPORT_SYMBOL_GPL(virtqueue_get_vring); > * The caller calls this to do dma mapping in advance. The DMA address can be > * passed to this _vq when it is in pre-mapped mode. > * > - * return DMA address. Caller should check that by virtqueue_mapping_error(). > + * return mapped address. Caller should check that by > virtqueue_mapping_error(). > */ > dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void *ptr, > size_t size, > @@ -3155,8 +3274,8 @@ dma_addr_t virtqueue_map_single_attrs(const struct > virtqueue *_vq, void *ptr, > "rejecting DMA map of vmalloc memory\n")) > return DMA_MAPPING_ERROR; > > - return dma_map_page_attrs(vring_dma_dev(vq), virt_to_page(ptr), > - offset_in_page(ptr), size, dir, attrs); > + return virtqueue_map_page_attrs(&vq->vq, virt_to_page(ptr), > + offset_in_page(ptr), size, dir, > attrs); > } > EXPORT_SYMBOL_GPL(virtqueue_map_single_attrs); > > @@ -3181,12 +3300,12 @@ void virtqueue_unmap_single_attrs(const struct > virtqueue *_vq, > if (!vq->use_map_api) > return; > > - dma_unmap_page_attrs(vring_dma_dev(vq), addr, size, dir, attrs); > + virtqueue_unmap_page_attrs(_vq, addr, size, dir, attrs); > } > EXPORT_SYMBOL_GPL(virtqueue_unmap_single_attrs); > > /** > - * virtqueue_map_mapping_error - check dma address > + * virtqueue_mapping_error - check dma address > * @_vq: the struct virtqueue we're talking about. > * @addr: DMA address > * > @@ -3196,10 +3315,7 @@ int virtqueue_map_mapping_error(const struct virtqueue > *_vq, dma_addr_t addr) > { > const struct vring_virtqueue *vq = to_vvq(_vq); > > - if (!vq->use_map_api) > - return 0; > - > - return dma_mapping_error(vring_dma_dev(vq), addr); > + return vring_mapping_error(vq, addr); > } > EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); > > @@ -3216,11 +3332,15 @@ EXPORT_SYMBOL_GPL(virtqueue_map_mapping_error); > bool virtqueue_map_need_sync(const struct virtqueue *_vq, dma_addr_t addr) > { > const struct vring_virtqueue *vq = to_vvq(_vq); > + struct virtio_device *vdev = _vq->vdev; > > if (!vq->use_map_api) > return false; > > - return dma_need_sync(vring_dma_dev(vq), addr); > + if (vdev->map) > + return vdev->map->need_sync(vring_mapping_token(vq), addr); > + else > + return dma_need_sync(vring_dma_dev(vq), addr); > } > EXPORT_SYMBOL_GPL(virtqueue_map_need_sync); > > @@ -3242,12 +3362,17 @@ void virtqueue_map_sync_single_range_for_cpu(const > struct virtqueue *_vq, > enum dma_data_direction dir) > { > const struct vring_virtqueue *vq = to_vvq(_vq); > - struct device *dev = vring_dma_dev(vq); > + struct virtio_device *vdev = _vq->vdev; > > if (!vq->use_map_api) > return; > > - dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); > + if (vdev->map) > + vdev->map->sync_single_for_cpu(vring_mapping_token(vq), > + addr + offset, size, dir); > + else > + dma_sync_single_range_for_cpu(vring_dma_dev(vq), > + addr, offset, size, dir); > } > EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_cpu); > > @@ -3268,12 +3393,18 @@ void virtqueue_map_sync_single_range_for_device(const > struct virtqueue *_vq, > enum dma_data_direction dir) > { > const struct vring_virtqueue *vq = to_vvq(_vq); > - struct device *dev = vring_dma_dev(vq); > + struct virtio_device *vdev = _vq->vdev; > > if (!vq->use_map_api) > return; > > - dma_sync_single_range_for_device(dev, addr, offset, size, dir); > + if (vdev->map) > + vdev->map->sync_single_for_device(vring_mapping_token(vq), > + addr + offset, > + size, dir); > + else > + dma_sync_single_range_for_device(vring_dma_dev(vq), addr, > + offset, size, dir); > } > EXPORT_SYMBOL_GPL(virtqueue_map_sync_single_range_for_device); > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h > index 37029df94aaf..e1973c7b1d1c 100644 > --- a/include/linux/virtio.h > +++ b/include/linux/virtio.h > @@ -44,7 +44,7 @@ union vring_mapping_token { > /* Device that performs DMA */ > struct device *dma_dev; > /* Transport specific token used for doing map */ > - void *opaque; > + void *token; > }; > > int virtqueue_add_outbuf(struct virtqueue *vq, > @@ -165,6 +165,7 @@ struct virtio_device { > struct virtio_device_id id; > const struct virtio_config_ops *config; > const struct vringh_config_ops *vringh_config; > + const struct virtio_map_ops *map; > struct list_head vqs; > u64 features; > void *priv; > @@ -266,6 +267,29 @@ void unregister_virtio_driver(struct virtio_driver *drv); > module_driver(__virtio_driver, register_virtio_driver, \ > unregister_virtio_driver) > > + > +void *virtqueue_map_alloc_coherent(struct virtio_device *vdev, > + union vring_mapping_token *mapping_token, > + size_t size, dma_addr_t *dma_handle, > + gfp_t gfp); > + > +void virtqueue_map_free_coherent(struct virtio_device *vdev, > + union vring_mapping_token *mapping_token, > + size_t size, void *vaddr, > + dma_addr_t dma_handle); > + > +dma_addr_t virtqueue_map_page_attrs(const struct virtqueue *_vq, > + struct page *page, > + unsigned long offset, > + size_t size, > + enum dma_data_direction dir, > + unsigned long attrs); > + > +void virtqueue_unmap_page_attrs(const struct virtqueue *_vq, > + dma_addr_t dma_handle, > + size_t size, enum dma_data_direction dir, > + unsigned long attrs); > + > dma_addr_t virtqueue_map_single_attrs(const struct virtqueue *_vq, void > *ptr, size_t size, > enum dma_data_direction dir, > unsigned long attrs); > void virtqueue_unmap_single_attrs(const struct virtqueue *_vq, dma_addr_t > addr, > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h > index b3e1d30c765b..784ce56197c4 100644 > --- a/include/linux/virtio_config.h > +++ b/include/linux/virtio_config.h > @@ -133,6 +133,78 @@ struct virtio_config_ops { > int (*enable_vq_after_reset)(struct virtqueue *vq); > }; > > +/** > + * struct virtio_map_ops - operations for mapping buffer for a virtio device > + * Note: For transport that has its own mapping logic it must > + * implements all of the operations > + * @map_page: map a buffer to the device > + * token: device specific mapping token > + * page: the page that will be mapped by the device > + * offset: the offset in the page for a buffer > + * size: the buffer size > + * dir: mapping direction > + * attrs: mapping attributes > + * Returns: the mapped address > + * @unmap_page: unmap a buffer from the device > + * token: device specific mapping token > + * map_handle: the mapped address > + * size: the buffer size > + * dir: mapping direction > + * attrs: unmapping attributes > + * @sync_single_for_cpu: sync a single buffer from device to cpu > + * token: device specific mapping token > + * map_handle: the mapping address to sync > + * size: the size of the buffer > + * dir: synchronization direction > + * @sync_single_for_device: sync a single buffer from cpu to device > + * token: device specific mapping token > + * map_handle: the mapping address to sync > + * size: the size of the buffer > + * dir: synchronization direction > + * @alloc: alloc a coherent buffer mapping > + * token: device specific mapping token > + * size: the size of the buffer > + * map_handle: the mapping address to sync > + * gfp: allocation flag (GFP_XXX) > + * Returns: virtual address of the allocated buffer > + * @free: free a coherent buffer mapping > + * token: device specific mapping token > + * size: the size of the buffer > + * vaddr: virtual address of the buffer > + * map_handle: the mapping address to sync > + * attrs: unmapping attributes > + * @need_sync: if the buffer needs synchronization > + * token: device specific mapping token > + * map_handle: the mapped address > + * Returns: whether the buffer needs synchronization > + * @mapping_error: if the mapping address is error > + * token: device specific mapping token > + * map_handle: the mapped address > + * @max_mapping_size: get the maximum buffer size that can be mapped > + * token: device specific mapping token > + * Returns: the maximum buffer size that can be mapped > + */ > +struct virtio_map_ops { > + dma_addr_t (*map_page)(void *token, struct page *page, > + unsigned long offset, size_t size, > + enum dma_data_direction dir, unsigned long > attrs); > + void (*unmap_page)(void *token, dma_addr_t map_handle, > + size_t size, enum dma_data_direction dir, > + unsigned long attrs); > + void (*sync_single_for_cpu)(void *token, dma_addr_t map_handle, > + size_t size, enum dma_data_direction dir); > + void (*sync_single_for_device)(void *token, > + dma_addr_t map_handle, size_t size, > + enum dma_data_direction dir); > + void *(*alloc)(void *token, size_t size, > + dma_addr_t *map_handle, gfp_t gfp); > + void (*free)(void *token, size_t size, void *vaddr, > + dma_addr_t map_handle, unsigned long attrs); > + bool (*need_sync)(void *token, dma_addr_t map_handle); > + int (*mapping_error)(void *token, dma_addr_t map_handle); > + size_t (*max_mapping_size)(void *token); > +}; > + > /* If driver didn't advertise the feature, it will never appear. */ > void virtio_check_driver_offered_feature(const struct virtio_device *vdev, > unsigned int fbit); > -- > 2.31.1 >