Anatoly Burakov <anatoly.bura...@intel.com> writes:

> Currently, there is no way to map memory for DMA in a way that allows
> unmapping it partially later, because some IOMMU's do not support
> partial unmapping. There is a workaround of mapping all of these
> segments separately, but this is inconvenient and silly, so this
> commit adds a proper API that does it.
>
> This commit relies on earlier infrastructure that was built out to
> support "chunking", as the concept of "chunks" is essentially the same
> as page size.
>
> Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com>
> ---
>
> Notes:
>     v2:
>     - Added checks for page alignment of supplied values
>     - Added rte_errno values (similar to patch 103165 [1])
>     
>     [1] 
> http://patches.dpdk.org/project/dpdk/patch/e8c5e7ba089e2283c3cd67e4529e52fe49390eb9.1635428963.git.anatoly.bura...@intel.com/
>
>  lib/eal/freebsd/eal.c      |  11 ++++
>  lib/eal/include/rte_vfio.h |  33 ++++++++++++
>  lib/eal/linux/eal_vfio.c   | 108 +++++++++++++++++++++++++++++++------
>  lib/eal/version.map        |   1 +
>  lib/eal/windows/eal.c      |  11 ++++
>  5 files changed, 148 insertions(+), 16 deletions(-)
>
> diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
> index 9935356ed4..5ca5fa8e11 100644
> --- a/lib/eal/freebsd/eal.c
> +++ b/lib/eal/freebsd/eal.c
> @@ -1090,6 +1090,17 @@ rte_vfio_container_dma_map(__rte_unused int 
> container_fd,
>       return -1;
>  }
>  
> +int
> +rte_vfio_container_dma_map_paged(__rte_unused int container_fd,
> +             __rte_unused uint64_t vaddr,
> +             __rte_unused uint64_t iova,
> +             __rte_unused uint64_t len,
> +             __rte_unused uint64_t pagesz)
> +{
> +     rte_errno = ENOTSUP;
> +     return -1;
> +}
> +
>  int
>  rte_vfio_container_dma_unmap(__rte_unused int container_fd,
>                       __rte_unused uint64_t vaddr,
> diff --git a/lib/eal/include/rte_vfio.h b/lib/eal/include/rte_vfio.h
> index 2d90b36480..6afae2ccce 100644
> --- a/lib/eal/include/rte_vfio.h
> +++ b/lib/eal/include/rte_vfio.h
> @@ -17,6 +17,8 @@ extern "C" {
>  #include <stdbool.h>
>  #include <stdint.h>
>  
> +#include <rte_compat.h>
> +
>  /*
>   * determine if VFIO is present on the system
>   */
> @@ -331,6 +333,37 @@ int
>  rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
>               uint64_t iova, uint64_t len);
>  
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Perform DMA mapping for devices in a container, mapping memory 
> page-by-page.
> + *
> + * @param container_fd
> + *   the specified container fd. Use RTE_VFIO_DEFAULT_CONTAINER_FD to
> + *   use the default container.
> + *
> + * @param vaddr
> + *   Starting virtual address of memory to be mapped.
> + *
> + * @param iova
> + *   Starting IOVA address of memory to be mapped.
> + *
> + * @param len
> + *   Length of memory segment being mapped.
> + *
> + * @param pagesz
> + *   Page size of the underlying memory.
> + *
> + * @return
> + *    0 if successful
> + *   <0 if failed
> + */
> +__rte_experimental
> +int
> +rte_vfio_container_dma_map_paged(int container_fd, uint64_t vaddr,
> +             uint64_t iova, uint64_t len, uint64_t pagesz);
> +
>  /**
>   * Perform DMA unmapping for devices in a container.
>   *
> diff --git a/lib/eal/linux/eal_vfio.c b/lib/eal/linux/eal_vfio.c
> index aa2087a2da..0f5b6c14af 100644
> --- a/lib/eal/linux/eal_vfio.c
> +++ b/lib/eal/linux/eal_vfio.c
> @@ -1872,11 +1872,12 @@ vfio_dma_mem_map(struct vfio_config *vfio_cfg, 
> uint64_t vaddr, uint64_t iova,
>  
>  static int
>  container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t 
> iova,
> -             uint64_t len)
> +             uint64_t len, uint64_t pagesz)
>  {
>       struct user_mem_map *new_map;
>       struct user_mem_maps *user_mem_maps;
>       bool has_partial_unmap;
> +     uint64_t chunk_size;
>       int ret = 0;
>  
>       user_mem_maps = &vfio_cfg->mem_maps;
> @@ -1887,19 +1888,37 @@ container_dma_map(struct vfio_config *vfio_cfg, 
> uint64_t vaddr, uint64_t iova,
>               ret = -1;
>               goto out;
>       }
> -     /* map the entry */
> -     if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) {
> -             /* technically, this will fail if there are currently no devices
> -              * plugged in, even if a device were added later, this mapping
> -              * might have succeeded. however, since we cannot verify if this
> -              * is a valid mapping without having a device attached, consider
> -              * this to be unsupported, because we can't just store any old
> -              * mapping and pollute list of active mappings willy-nilly.
> -              */
> -             RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n");
> -             ret = -1;
> -             goto out;
> +
> +     /* technically, mapping will fail if there are currently no devices
> +      * plugged in, even if a device were added later, this mapping might
> +      * have succeeded. however, since we cannot verify if this is a valid
> +      * mapping without having a device attached, consider this to be
> +      * unsupported, because we can't just store any old mapping and pollute
> +      * list of active mappings willy-nilly.
> +      */
> +
> +     /* if page size was not specified, map the entire segment in one go */
> +     if (pagesz == 0) {
> +             if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) {
> +                     RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n");
> +                     ret = -1;
> +                     goto out;
> +             }
> +     } else {
> +             /* otherwise, do mappings page-by-page */
> +             uint64_t offset;
> +
> +             for (offset = 0; offset < len; offset += pagesz) {
> +                     uint64_t va = vaddr + offset;
> +                     uint64_t io = iova + offset;
> +                     if (vfio_dma_mem_map(vfio_cfg, va, io, pagesz, 1)) {
> +                             RTE_LOG(ERR, EAL, "Couldn't map new region for 
> DMA\n");
> +                             ret = -1;
> +                             goto out;
> +                     }
> +             }
>       }
> +
>       /* do we have partial unmap support? */
>       has_partial_unmap = vfio_cfg->vfio_iommu_type->partial_unmap;
>  
> @@ -1908,8 +1927,18 @@ container_dma_map(struct vfio_config *vfio_cfg, 
> uint64_t vaddr, uint64_t iova,
>       new_map->addr = vaddr;
>       new_map->iova = iova;
>       new_map->len = len;
> -     /* for IOMMU types supporting partial unmap, we don't need chunking */
> -     new_map->chunk = has_partial_unmap ? 0 : len;
> +
> +     /*
> +      * Chunking essentially serves largely the same purpose as page sizes,
> +      * so for the purposes of this calculation, we treat them as the same.
> +      * The reason we have page sizes is because we want to map things in a
> +      * way that allows us to partially unmap later. Therefore, when IOMMU
> +      * supports partial unmap, page size is irrelevant and can be ignored.
> +      * For IOMMU that don't support partial unmap, page size is equivalent
> +      * to chunk size.
> +      */
> +     chunk_size = pagesz == 0 ? len : pagesz;
> +     new_map->chunk = has_partial_unmap ? 0 : chunk_size;
>  
>       compact_user_maps(user_mem_maps);
>  out:
> @@ -2179,7 +2208,44 @@ rte_vfio_container_dma_map(int container_fd, uint64_t 
> vaddr, uint64_t iova,
>               return -1;
>       }
>  
> -     return container_dma_map(vfio_cfg, vaddr, iova, len);
> +     /* not having page size means we map entire segment */
> +     return container_dma_map(vfio_cfg, vaddr, iova, len, 0);
> +}
> +
> +int
> +rte_vfio_container_dma_map_paged(int container_fd, uint64_t vaddr,
> +             uint64_t iova, uint64_t len, uint64_t pagesz)
> +{
> +     struct vfio_config *vfio_cfg;
> +     bool va_aligned, iova_aligned, len_aligned, pagesz_aligned;
> +
> +     /* avoid divide by zero first */
> +     if (len == 0 || pagesz == 0) {
> +             rte_errno = EINVAL;
> +             return -1;
> +     }
> +     /*
> +      * for direct maps, we leave it up to the kernel to decide whether the
> +      * parameters are valid, but since we are going to map page-by-page and
> +      * are expecting certain guarantees from the parameters we receive, we
> +      * are going to check those here.
> +      */
> +     pagesz_aligned = rte_align64pow2(pagesz) == pagesz;
> +     len_aligned = (len % pagesz) == 0;
> +     va_aligned = RTE_ALIGN(vaddr, pagesz) == pagesz;
> +     iova_aligned = RTE_ALIGN(iova, pagesz) == pagesz;
> +     if (!pagesz_aligned || !len_aligned || !va_aligned || !iova_aligned) {
> +             rte_errno = EINVAL;
> +             return -1;
> +     }
> +
> +     vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
> +     if (vfio_cfg == NULL) {
> +             RTE_LOG(ERR, EAL, "Invalid VFIO container fd\n");
> +             return -1;
> +     }
> +
> +     return container_dma_map(vfio_cfg, vaddr, iova, len, pagesz);
>  }
>  
>  int
> @@ -2299,6 +2365,16 @@ rte_vfio_container_dma_map(__rte_unused int 
> container_fd,
>       return -1;
>  }
>  
> +int
> +rte_vfio_container_dma_map_paged(__rte_unused int container_fd,
> +             __rte_unused uint64_t vaddr,
> +             __rte_unused uint64_t iova,
> +             __rte_unused uint64_t len,
> +             __rte_unused uint64_t pagesz)
> +{
> +     return -1;
> +}
> +
>  int
>  rte_vfio_container_dma_unmap(__rte_unused int container_fd,
>               __rte_unused uint64_t vaddr,
> diff --git a/lib/eal/version.map b/lib/eal/version.map
> index 9d43655b66..82a34c2014 100644
> --- a/lib/eal/version.map
> +++ b/lib/eal/version.map
> @@ -420,6 +420,7 @@ EXPERIMENTAL {
>       rte_intr_instance_free;
>       rte_intr_type_get;
>       rte_intr_type_set;

Pls anontate with the target release in a comment. 

> +     rte_vfio_container_dma_map_paged;
>  };
>  
>  INTERNAL {
> diff --git a/lib/eal/windows/eal.c b/lib/eal/windows/eal.c
> index f7ce1b6671..93771cb230 100644
> --- a/lib/eal/windows/eal.c
> +++ b/lib/eal/windows/eal.c
> @@ -484,6 +484,17 @@ rte_vfio_container_dma_map(__rte_unused int container_fd,
>       return -1;
>  }
>  
> +int
> +rte_vfio_container_dma_map_paged(__rte_unused int container_fd,
> +             __rte_unused uint64_t vaddr,
> +             __rte_unused uint64_t iova,
> +             __rte_unused uint64_t len,
> +             __rte_unused uint64_t pagesz)
> +{
> +     rte_errno = ENOTSUP;
> +     return -1;
> +}
> +
>  int
>  rte_vfio_container_dma_unmap(__rte_unused int container_fd,
>                       __rte_unused uint64_t vaddr,


-- 
Regards, Ray K

Reply via email to