dmabuf is a modern Linux kernel feature to allow DMA transfers between two drivers. Common examples of usage are streaming video devices and NIC to GPU transfers. Prior to dmabuf users had to load proprietary drivers to expose the DMA mappings. With dmabuf the proprietary drivers are no longer required.
Signed-off-by: Cliff Burdick <[email protected]> --- drivers/common/mlx5/linux/meson.build | 2 + drivers/common/mlx5/linux/mlx5_common_verbs.c | 48 +++++++- drivers/common/mlx5/linux/mlx5_glue.c | 19 +++ drivers/common/mlx5/linux/mlx5_glue.h | 3 + drivers/common/mlx5/mlx5_common.c | 28 ++++- drivers/common/mlx5/mlx5_common_mr.c | 108 +++++++++++++++++- drivers/common/mlx5/mlx5_common_mr.h | 17 ++- drivers/common/mlx5/windows/mlx5_common_os.c | 8 +- drivers/crypto/mlx5/mlx5_crypto.h | 1 + drivers/crypto/mlx5/mlx5_crypto_gcm.c | 3 +- 10 files changed, 229 insertions(+), 8 deletions(-) diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build index 3767e7a69b..8e83104165 100644 --- a/drivers/common/mlx5/linux/meson.build +++ b/drivers/common/mlx5/linux/meson.build @@ -203,6 +203,8 @@ has_sym_args = [ 'mlx5dv_dr_domain_allow_duplicate_rules' ], [ 'HAVE_MLX5_IBV_REG_MR_IOVA', 'infiniband/verbs.h', 'ibv_reg_mr_iova' ], + [ 'HAVE_IBV_REG_DMABUF_MR', 'infiniband/verbs.h', + 'ibv_reg_dmabuf_mr' ], [ 'HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR', 'infiniband/verbs.h', 'ibv_import_device' ], [ 'HAVE_MLX5DV_DR_ACTION_CREATE_DEST_ROOT_TABLE', 'infiniband/mlx5dv.h', diff --git a/drivers/common/mlx5/linux/mlx5_common_verbs.c b/drivers/common/mlx5/linux/mlx5_common_verbs.c index 98260df470..f6d18fd5df 100644 --- a/drivers/common/mlx5/linux/mlx5_common_verbs.c +++ b/drivers/common/mlx5/linux/mlx5_common_verbs.c @@ -129,6 +129,47 @@ mlx5_common_verbs_reg_mr(void *pd, void *addr, size_t length, return 0; } +/** + * Register mr for dma-buf backed memory. Given protection domain pointer, + * dma-buf fd, offset and length, register the memory region. + * + * @param[in] pd + * Pointer to protection domain context. + * @param[in] offset + * Offset within the dma-buf. + * @param[in] length + * Length of the memory to register. + * @param[in] fd + * File descriptor of the dma-buf. + * @param[out] pmd_mr + * pmd_mr struct set with lkey, address, length and pointer to mr object + * + * @return + * 0 on successful registration, -1 otherwise + */ +RTE_EXPORT_INTERNAL_SYMBOL(mlx5_common_verbs_reg_dmabuf_mr) +int +mlx5_common_verbs_reg_dmabuf_mr(void *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, + struct mlx5_pmd_mr *pmd_mr) +{ + struct ibv_mr *ibv_mr; + ibv_mr = mlx5_glue->reg_dmabuf_mr(pd, offset, length, iova, fd, + IBV_ACCESS_LOCAL_WRITE | + (haswell_broadwell_cpu ? 0 : + IBV_ACCESS_RELAXED_ORDERING)); + if (!ibv_mr) + return -1; + + *pmd_mr = (struct mlx5_pmd_mr){ + .lkey = ibv_mr->lkey, + .addr = ibv_mr->addr, + .len = ibv_mr->length, + .obj = (void *)ibv_mr, + }; + return 0; +} + /** * Deregister mr. Given the mlx5 pmd MR - deregister the MR * @@ -151,13 +192,18 @@ mlx5_common_verbs_dereg_mr(struct mlx5_pmd_mr *pmd_mr) * * @param[out] reg_mr_cb * Pointer to reg_mr func + * @param[out] reg_dmabuf_mr_cb + * Pointer to reg_dmabuf_mr func * @param[out] dereg_mr_cb * Pointer to dereg_mr func */ RTE_EXPORT_INTERNAL_SYMBOL(mlx5_os_set_reg_mr_cb) void -mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, mlx5_dereg_mr_t *dereg_mr_cb) +mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, + mlx5_reg_dmabuf_mr_t *reg_dmabuf_mr_cb, + mlx5_dereg_mr_t *dereg_mr_cb) { *reg_mr_cb = mlx5_common_verbs_reg_mr; + *reg_dmabuf_mr_cb = mlx5_common_verbs_reg_dmabuf_mr; *dereg_mr_cb = mlx5_common_verbs_dereg_mr; } diff --git a/drivers/common/mlx5/linux/mlx5_glue.c b/drivers/common/mlx5/linux/mlx5_glue.c index a91eaa429d..6fac7f2bcd 100644 --- a/drivers/common/mlx5/linux/mlx5_glue.c +++ b/drivers/common/mlx5/linux/mlx5_glue.c @@ -291,6 +291,24 @@ mlx5_glue_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length, #endif } +static struct ibv_mr * +mlx5_glue_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int access) +{ +#ifdef HAVE_IBV_REG_DMABUF_MR + return ibv_reg_dmabuf_mr(pd, offset, length, iova, fd, access); +#else + (void)pd; + (void)offset; + (void)length; + (void)iova; + (void)fd; + (void)access; + errno = ENOTSUP; + return NULL; +#endif +} + static struct ibv_mr * mlx5_glue_alloc_null_mr(struct ibv_pd *pd) { @@ -1619,6 +1637,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue) { .modify_qp = mlx5_glue_modify_qp, .reg_mr = mlx5_glue_reg_mr, .reg_mr_iova = mlx5_glue_reg_mr_iova, + .reg_dmabuf_mr = mlx5_glue_reg_dmabuf_mr, .alloc_null_mr = mlx5_glue_alloc_null_mr, .dereg_mr = mlx5_glue_dereg_mr, .create_counter_set = mlx5_glue_create_counter_set, diff --git a/drivers/common/mlx5/linux/mlx5_glue.h b/drivers/common/mlx5/linux/mlx5_glue.h index 81d6b0aaf9..66216d1194 100644 --- a/drivers/common/mlx5/linux/mlx5_glue.h +++ b/drivers/common/mlx5/linux/mlx5_glue.h @@ -219,6 +219,9 @@ struct mlx5_glue { struct ibv_mr *(*reg_mr_iova)(struct ibv_pd *pd, void *addr, size_t length, uint64_t iova, int access); + struct ibv_mr *(*reg_dmabuf_mr)(struct ibv_pd *pd, uint64_t offset, + size_t length, uint64_t iova, + int fd, int access); struct ibv_mr *(*alloc_null_mr)(struct ibv_pd *pd); int (*dereg_mr)(struct ibv_mr *mr); struct ibv_counter_set *(*create_counter_set) diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c index 84a93e7dbd..0ec59b0122 100644 --- a/drivers/common/mlx5/mlx5_common.c +++ b/drivers/common/mlx5/mlx5_common.c @@ -13,6 +13,7 @@ #include <rte_class.h> #include <rte_malloc.h> #include <rte_eal_paging.h> +#include <rte_memory.h> #include "mlx5_common.h" #include "mlx5_common_os.h" @@ -1125,6 +1126,7 @@ mlx5_common_dev_dma_map(struct rte_device *rte_dev, void *addr, struct mlx5_common_device *dev; struct mlx5_mr_btree *bt; struct mlx5_mr *mr; + struct rte_memseg_list *msl; dev = to_mlx5_device(rte_dev); if (!dev) { @@ -1134,8 +1136,30 @@ mlx5_common_dev_dma_map(struct rte_device *rte_dev, void *addr, rte_errno = ENODEV; return -1; } - mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, - SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); + /* Check if this is dma-buf backed external memory */ + msl = rte_mem_virt2memseg_list(addr); + if (msl != NULL && msl->external) { + int dmabuf_fd = rte_memseg_list_get_dmabuf_fd_thread_unsafe(msl); + if (dmabuf_fd >= 0) { + uint64_t dmabuf_off; + /* Get base offset from memseg list */ + rte_memseg_list_get_dmabuf_offset_thread_unsafe(msl, &dmabuf_off); + /* Calculate offset within dmabuf for this specific address */ + dmabuf_off += ((uintptr_t)addr - (uintptr_t)msl->base_va); + /* Use dma-buf MR registration */ + mr = mlx5_create_mr_ext_dmabuf(dev->pd, (uintptr_t)addr, len, + SOCKET_ID_ANY, dmabuf_fd, dmabuf_off, + dev->mr_scache.reg_dmabuf_mr_cb); + } else { + /* Use regular MR registration */ + mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, + SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); + } + } else { + /* Use regular MR registration */ + mr = mlx5_create_mr_ext(dev->pd, (uintptr_t)addr, len, + SOCKET_ID_ANY, dev->mr_scache.reg_mr_cb); + } if (!mr) { DRV_LOG(WARNING, "Device %s unable to DMA map", rte_dev->name); rte_errno = EINVAL; diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c index 8ed988dec9..18b8a6eaa5 100644 --- a/drivers/common/mlx5/mlx5_common_mr.c +++ b/drivers/common/mlx5/mlx5_common_mr.c @@ -8,6 +8,7 @@ #include <rte_eal_memconfig.h> #include <rte_eal_paging.h> #include <rte_errno.h> +#include <rte_memory.h> #include <rte_mempool.h> #include <rte_malloc.h> #include <rte_rwlock.h> @@ -1141,6 +1142,7 @@ mlx5_mr_create_cache(struct mlx5_mr_share_cache *share_cache, int socket) { /* Set the reg_mr and dereg_mr callback functions */ mlx5_os_set_reg_mr_cb(&share_cache->reg_mr_cb, + &share_cache->reg_dmabuf_mr_cb, &share_cache->dereg_mr_cb); rte_rwlock_init(&share_cache->rwlock); rte_rwlock_init(&share_cache->mprwlock); @@ -1221,6 +1223,74 @@ mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, return mr; } +/** + * Creates a memory region for dma-buf backed external memory. + * + * @param pd + * Pointer to pd of a device (net, regex, vdpa,...). + * @param addr + * Starting virtual address of memory (mmap'd address). + * @param len + * Length of memory segment being mapped. + * @param socket_id + * Socket to allocate heap memory for the control structures. + * @param dmabuf_fd + * File descriptor of the dma-buf. + * @param dmabuf_offset + * Offset within the dma-buf. + * @param reg_dmabuf_mr_cb + * Callback function for dma-buf MR registration. + * + * @return + * Pointer to MR structure on success, NULL otherwise. + */ +struct mlx5_mr * +mlx5_create_mr_ext_dmabuf(void *pd, uintptr_t addr, size_t len, int socket_id, + int dmabuf_fd, uint64_t dmabuf_offset, + mlx5_reg_dmabuf_mr_t reg_dmabuf_mr_cb) +{ + struct mlx5_mr *mr = NULL; + + if (reg_dmabuf_mr_cb == NULL) { + DRV_LOG(WARNING, "dma-buf MR registration not supported"); + rte_errno = ENOTSUP; + return NULL; + } + mr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, + RTE_ALIGN_CEIL(sizeof(*mr), RTE_CACHE_LINE_SIZE), + RTE_CACHE_LINE_SIZE, socket_id); + if (mr == NULL) + return NULL; + if (reg_dmabuf_mr_cb(pd, dmabuf_offset, len, addr, dmabuf_fd, + &mr->pmd_mr) < 0) { + DRV_LOG(WARNING, + "Fail to create dma-buf MR for address (%p) fd=%d", + (void *)addr, dmabuf_fd); + mlx5_free(mr); + return NULL; + } + mr->msl = NULL; /* Mark it is external memory. */ + mr->ms_bmp = NULL; + mr->ms_n = 1; + mr->ms_bmp_n = 1; + /* + * For dma-buf MR, the returned addr may be NULL since there's no VA + * in the registration. Store the user-provided addr for cache lookup. + */ + if (mr->pmd_mr.addr == NULL) + mr->pmd_mr.addr = (void *)addr; + if (mr->pmd_mr.len == 0) + mr->pmd_mr.len = len; + DRV_LOG(DEBUG, + "MR CREATED (%p) for dma-buf external memory %p (fd=%d):\n" + " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," + " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u", + (void *)mr, (void *)addr, dmabuf_fd, + addr, addr + len, rte_cpu_to_be_32(mr->pmd_mr.lkey), + mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n); + return mr; +} + /** * Callback for memory free event. Iterate freed memsegs and check whether it * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a @@ -1747,9 +1817,43 @@ mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache, struct mlx5_mempool_mr *mr = &new_mpr->mrs[i]; const struct mlx5_range *range = &ranges[i]; size_t len = range->end - range->start; + struct rte_memseg_list *msl; + int reg_result; + + /* Check if this is dma-buf backed external memory */ + msl = rte_mem_virt2memseg_list((void *)range->start); + if (msl != NULL && msl->external && + share_cache->reg_dmabuf_mr_cb != NULL) { + int dmabuf_fd = rte_memseg_list_get_dmabuf_fd_thread_unsafe(msl); + if (dmabuf_fd >= 0) { + uint64_t dmabuf_off; + /* Get base offset from memseg list */ + rte_memseg_list_get_dmabuf_offset_thread_unsafe(msl, &dmabuf_off); + /* Calculate offset within dmabuf for this specific range */ + dmabuf_off += (range->start - (uintptr_t)msl->base_va); + /* Use dma-buf MR registration */ + reg_result = share_cache->reg_dmabuf_mr_cb(pd, + dmabuf_off, len, range->start, dmabuf_fd, + &mr->pmd_mr); + if (reg_result == 0) { + /* For dma-buf MR, set addr if not set by driver */ + if (mr->pmd_mr.addr == NULL) + mr->pmd_mr.addr = (void *)range->start; + if (mr->pmd_mr.len == 0) + mr->pmd_mr.len = len; + } + } else { + /* Use regular MR registration */ + reg_result = share_cache->reg_mr_cb(pd, + (void *)range->start, len, &mr->pmd_mr); + } + } else { + /* Use regular MR registration */ + reg_result = share_cache->reg_mr_cb(pd, + (void *)range->start, len, &mr->pmd_mr); + } - if (share_cache->reg_mr_cb(pd, (void *)range->start, len, - &mr->pmd_mr) < 0) { + if (reg_result < 0) { DRV_LOG(ERR, "Failed to create an MR in PD %p for address range " "[0x%" PRIxPTR ", 0x%" PRIxPTR "] (%zu bytes) for mempool %s", diff --git a/drivers/common/mlx5/mlx5_common_mr.h b/drivers/common/mlx5/mlx5_common_mr.h index cf7c685e9b..3b967b1323 100644 --- a/drivers/common/mlx5/mlx5_common_mr.h +++ b/drivers/common/mlx5/mlx5_common_mr.h @@ -35,6 +35,9 @@ struct mlx5_pmd_mr { */ typedef int (*mlx5_reg_mr_t)(void *pd, void *addr, size_t length, struct mlx5_pmd_mr *pmd_mr); +typedef int (*mlx5_reg_dmabuf_mr_t)(void *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, + struct mlx5_pmd_mr *pmd_mr); typedef void (*mlx5_dereg_mr_t)(struct mlx5_pmd_mr *pmd_mr); /* Memory Region object. */ @@ -87,6 +90,7 @@ struct __rte_packed_begin mlx5_mr_share_cache { struct mlx5_mr_list mr_free_list; /* Freed MR list. */ struct mlx5_mempool_reg_list mempool_reg_list; /* Mempool database. */ mlx5_reg_mr_t reg_mr_cb; /* Callback to reg_mr func */ + mlx5_reg_dmabuf_mr_t reg_dmabuf_mr_cb; /* Callback to reg_dmabuf_mr func */ mlx5_dereg_mr_t dereg_mr_cb; /* Callback to dereg_mr func */ } __rte_packed_end; @@ -233,6 +237,10 @@ mlx5_mr_lookup_list(struct mlx5_mr_share_cache *share_cache, struct mlx5_mr * mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, mlx5_reg_mr_t reg_mr_cb); +struct mlx5_mr * +mlx5_create_mr_ext_dmabuf(void *pd, uintptr_t addr, size_t len, int socket_id, + int dmabuf_fd, uint64_t dmabuf_offset, + mlx5_reg_dmabuf_mr_t reg_dmabuf_mr_cb); void mlx5_mr_free(struct mlx5_mr *mr, mlx5_dereg_mr_t dereg_mr_cb); __rte_internal uint32_t @@ -251,12 +259,19 @@ int mlx5_common_verbs_reg_mr(void *pd, void *addr, size_t length, struct mlx5_pmd_mr *pmd_mr); __rte_internal +int +mlx5_common_verbs_reg_dmabuf_mr(void *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, + struct mlx5_pmd_mr *pmd_mr); +__rte_internal void mlx5_common_verbs_dereg_mr(struct mlx5_pmd_mr *pmd_mr); __rte_internal void -mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, mlx5_dereg_mr_t *dereg_mr_cb); +mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, + mlx5_reg_dmabuf_mr_t *reg_dmabuf_mr_cb, + mlx5_dereg_mr_t *dereg_mr_cb); __rte_internal int diff --git a/drivers/common/mlx5/windows/mlx5_common_os.c b/drivers/common/mlx5/windows/mlx5_common_os.c index 7fac361460..5e284742ab 100644 --- a/drivers/common/mlx5/windows/mlx5_common_os.c +++ b/drivers/common/mlx5/windows/mlx5_common_os.c @@ -17,6 +17,7 @@ #include "mlx5_common.h" #include "mlx5_common_os.h" #include "mlx5_malloc.h" +#include "mlx5_common_mr.h" /** * Initialization routine for run-time dependency on external lib. @@ -442,15 +443,20 @@ mlx5_os_dereg_mr(struct mlx5_pmd_mr *pmd_mr) * * @param[out] reg_mr_cb * Pointer to reg_mr func + * @param[out] reg_dmabuf_mr_cb + * Pointer to reg_dmabuf_mr func (NULL on Windows - not supported) * @param[out] dereg_mr_cb * Pointer to dereg_mr func * */ RTE_EXPORT_INTERNAL_SYMBOL(mlx5_os_set_reg_mr_cb) void -mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, mlx5_dereg_mr_t *dereg_mr_cb) +mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, + mlx5_reg_dmabuf_mr_t *reg_dmabuf_mr_cb, + mlx5_dereg_mr_t *dereg_mr_cb) { *reg_mr_cb = mlx5_os_reg_mr; + *reg_dmabuf_mr_cb = NULL; /* dma-buf not supported on Windows */ *dereg_mr_cb = mlx5_os_dereg_mr; } diff --git a/drivers/crypto/mlx5/mlx5_crypto.h b/drivers/crypto/mlx5/mlx5_crypto.h index f9f127e9e6..b2712c9a8d 100644 --- a/drivers/crypto/mlx5/mlx5_crypto.h +++ b/drivers/crypto/mlx5/mlx5_crypto.h @@ -41,6 +41,7 @@ struct mlx5_crypto_priv { struct mlx5_common_device *cdev; /* Backend mlx5 device. */ struct rte_cryptodev *crypto_dev; mlx5_reg_mr_t reg_mr_cb; /* Callback to reg_mr func */ + mlx5_reg_dmabuf_mr_t reg_dmabuf_mr_cb; /* Callback to reg_dmabuf_mr func */ mlx5_dereg_mr_t dereg_mr_cb; /* Callback to dereg_mr func */ struct mlx5_uar uar; /* User Access Region. */ uint32_t max_segs_num; /* Maximum supported data segs. */ diff --git a/drivers/crypto/mlx5/mlx5_crypto_gcm.c b/drivers/crypto/mlx5/mlx5_crypto_gcm.c index 89f32c7722..380689cfeb 100644 --- a/drivers/crypto/mlx5/mlx5_crypto_gcm.c +++ b/drivers/crypto/mlx5/mlx5_crypto_gcm.c @@ -1186,7 +1186,8 @@ mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv) /* Override AES-GCM specified ops. */ dev_ops->sym_session_configure = mlx5_crypto_sym_gcm_session_configure; - mlx5_os_set_reg_mr_cb(&priv->reg_mr_cb, &priv->dereg_mr_cb); + mlx5_os_set_reg_mr_cb(&priv->reg_mr_cb, &priv->reg_dmabuf_mr_cb, + &priv->dereg_mr_cb); dev_ops->queue_pair_setup = mlx5_crypto_gcm_qp_setup; dev_ops->queue_pair_release = mlx5_crypto_gcm_qp_release; if (mlx5_crypto_is_ipsec_opt(priv)) { -- 2.52.0

