Query dma-buf TPH metadata when registering a dma-buf MR for peer-to- peer access to a PCIe endpoint and use it to program requester-side TPH on the outbound mkey. If the exporter has no metadata, fall back to the existing no-TPH path.
For TPH-backed FRMRs, make the extra ST-table reference belong to the hardware mkey handle rather than the transient MR object. Extend the FRMR pool API so reuse and final destroy can transfer and drop that ref at the handle lifetime boundaries, and add mlx5_st_get_index() to take a ref on an already-known ST index. Also decode PH from kernel_vendor_key when recreating pooled mkeys so the requester hint matches the pool key. Signed-off-by: Zhiping Zhang <[email protected]> --- drivers/infiniband/core/frmr_pools.c | 20 +++- drivers/infiniband/hw/mlx5/mr.c | 111 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/lib/st.c | 49 ++++++-- include/linux/mlx5/driver.h | 12 ++ include/rdma/frmr_pools.h | 5 +- 5 files changed, 178 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/frmr_pools.c b/drivers/infiniband/core/frmr_pools.c index 5e992ff3d7cf..61a77847118e 100644 --- a/drivers/infiniband/core/frmr_pools.c +++ b/drivers/infiniband/core/frmr_pools.c @@ -92,7 +92,8 @@ static void destroy_all_handles_in_queue(struct ib_device *device, u32 count; while (pop_frmr_handles_page(pool, queue, &page, &count)) { - pools->pool_ops->destroy_frmrs(device, page->handles, count); + pools->pool_ops->destroy_frmrs(device, &pool->key, + page->handles, count); kfree(page); } } @@ -136,7 +137,8 @@ static bool age_pinned_pool(struct ib_device *device, struct ib_frmr_pool *pool) spin_unlock(&pool->lock); if (destroyed) - pools->pool_ops->destroy_frmrs(device, handles, destroyed); + pools->pool_ops->destroy_frmrs(device, &pool->key, handles, + destroyed); kfree(handles); return has_work; } @@ -453,9 +455,11 @@ int ib_frmr_pools_set_pinned(struct ib_device *device, struct ib_frmr_key *key, } static int get_frmr_from_pool(struct ib_device *device, - struct ib_frmr_pool *pool, struct ib_mr *mr) + struct ib_frmr_pool *pool, struct ib_mr *mr, + bool *reused) { struct ib_frmr_pools *pools = device->frmr_pools; + bool local_reused = false; u32 handle; int err; @@ -464,6 +468,7 @@ static int get_frmr_from_pool(struct ib_device *device, if (pool->inactive_queue.ci > 0) { handle = pop_handle_from_queue_locked( &pool->inactive_queue); + local_reused = true; } else { spin_unlock(&pool->lock); err = pools->pool_ops->create_frmrs(device, &pool->key, @@ -474,6 +479,7 @@ static int get_frmr_from_pool(struct ib_device *device, } } else { handle = pop_handle_from_queue_locked(&pool->queue); + local_reused = true; } pool->in_use++; @@ -484,6 +490,8 @@ static int get_frmr_from_pool(struct ib_device *device, mr->frmr.pool = pool; mr->frmr.handle = handle; + if (reused) + *reused = local_reused; return 0; } @@ -493,10 +501,12 @@ static int get_frmr_from_pool(struct ib_device *device, * * @device: The device to pop the FRMR handle from. * @mr: The MR to pop the FRMR handle from. + * @reused: Optional output that reports whether the returned handle was + * reused from the pool instead of freshly created. * * Returns 0 on success, negative error code on failure. */ -int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr) +int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr, bool *reused) { struct ib_frmr_pools *pools = device->frmr_pools; struct ib_frmr_pool *pool; @@ -509,7 +519,7 @@ int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr) return PTR_ERR(pool); } - return get_frmr_from_pool(device, pool, mr); + return get_frmr_from_pool(device, pool, mr, reused); } EXPORT_SYMBOL(ib_frmr_pool_pop); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3b6da45061a5..5697c2862615 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -38,6 +38,7 @@ #include <linux/delay.h> #include <linux/dma-buf.h> #include <linux/dma-resv.h> +#include <linux/pci-tph.h> #include <rdma/frmr_pools.h> #include <rdma/ib_umem_odp.h> #include "dm.h" @@ -167,12 +168,39 @@ static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, #define MLX5_FRMR_POOLS_KERNEL_KEY_PH_MASK 0xFF0000 #define MLX5_FRMR_POOLS_KERNEL_KEY_ST_INDEX_MASK 0xFFFF +static int mlx5_ib_get_frmr_st_handle_ref(struct mlx5_ib_dev *dev, + u16 st_index) +{ + if (st_index == MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) + return 0; + + return mlx5_st_get_index(dev->mdev, st_index); +} + +static void mlx5_ib_put_st_index_ref(struct mlx5_ib_dev *dev, u16 st_index) +{ + if (st_index == MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) + return; + + mlx5_st_dealloc_index(dev->mdev, st_index); +} + +static void mlx5_ib_put_frmr_st_handle_ref(struct mlx5_ib_dev *dev, + u64 kernel_vendor_key) +{ + u16 st_index = kernel_vendor_key & + MLX5_FRMR_POOLS_KERNEL_KEY_ST_INDEX_MASK; + + mlx5_ib_put_st_index_ref(dev, st_index); +} + static struct mlx5_ib_mr * _mlx5_frmr_pool_alloc(struct mlx5_ib_dev *dev, struct ib_umem *umem, int access_flags, int access_mode, unsigned long page_size, u16 st_index, u8 ph) { struct mlx5_ib_mr *mr; + bool reused = false; int err; mr = kzalloc_obj(*mr); @@ -195,11 +223,14 @@ _mlx5_frmr_pool_alloc(struct mlx5_ib_dev *dev, struct ib_umem *umem, mr->ibmr.frmr.key.kernel_vendor_key = st_index | (ph << MLX5_FRMR_POOLS_KERNEL_KEY_PH_SHIFT); - err = ib_frmr_pool_pop(&dev->ib_dev, &mr->ibmr); + err = ib_frmr_pool_pop(&dev->ib_dev, &mr->ibmr, &reused); if (err) { kfree(mr); return ERR_PTR(err); } + if (reused) + mlx5_ib_put_frmr_st_handle_ref( + dev, mr->ibmr.frmr.key.kernel_vendor_key); mr->mmkey.key = mr->ibmr.frmr.handle; init_waitqueue_head(&mr->mmkey.wait); @@ -229,7 +260,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, init_waitqueue_head(&mr->mmkey.wait); mr->ibmr.frmr.key = key; - ret = ib_frmr_pool_pop(&dev->ib_dev, &mr->ibmr); + ret = ib_frmr_pool_pop(&dev->ib_dev, &mr->ibmr, NULL); if (ret) { kfree(mr); return ERR_PTR(ret); @@ -273,7 +304,8 @@ static int mlx5r_create_mkeys(struct ib_device *device, struct ib_frmr_key *key, st_index = key->kernel_vendor_key & MLX5_FRMR_POOLS_KERNEL_KEY_ST_INDEX_MASK; - ph = key->kernel_vendor_key & MLX5_FRMR_POOLS_KERNEL_KEY_PH_MASK; + ph = (key->kernel_vendor_key & MLX5_FRMR_POOLS_KERNEL_KEY_PH_MASK) >> + MLX5_FRMR_POOLS_KERNEL_KEY_PH_SHIFT; if (ph) { /* Normalize ph: swap MLX5_IB_NO_PH for 0 */ if (ph == MLX5_IB_NO_PH) @@ -299,7 +331,8 @@ static int mlx5r_create_mkeys(struct ib_device *device, struct ib_frmr_key *key, return err; } -static void mlx5r_destroy_mkeys(struct ib_device *device, u32 *handles, +static void mlx5r_destroy_mkeys(struct ib_device *device, + const struct ib_frmr_key *key, u32 *handles, unsigned int count) { struct mlx5_ib_dev *dev = to_mdev(device); @@ -311,6 +344,9 @@ static void mlx5r_destroy_mkeys(struct ib_device *device, u32 *handles, pr_warn_ratelimited( "mlx5_ib: failed to destroy mkey %d: %d", handles[i], err); + else + mlx5_ib_put_frmr_st_handle_ref(dev, + key->kernel_vendor_key); } } @@ -333,6 +369,7 @@ static int mlx5r_build_frmr_key(struct ib_device *device, get_unchangeable_access_flags(dev, in->access_flags); out->vendor_key = in->vendor_key; out->num_dma_blocks = in->num_dma_blocks; + out->kernel_vendor_key = in->kernel_vendor_key; return 0; } @@ -753,6 +790,12 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); if (xlt_with_umr) { + err = mlx5_ib_get_frmr_st_handle_ref(dev, st_index); + if (err) { + ib_umem_release(umem); + return ERR_PTR(err); + } + mr = alloc_cacheable_mr(pd, umem, iova, access_flags, MLX5_MKC_ACCESS_MODE_MTT, st_index, ph); @@ -767,6 +810,8 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, mutex_unlock(&dev->slow_path_mutex); } if (IS_ERR(mr)) { + if (xlt_with_umr) + mlx5_ib_put_st_index_ref(dev, st_index); ib_umem_release(umem); return ERR_CAST(mr); } @@ -899,6 +944,52 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { .invalidate_mappings = mlx5_ib_dmabuf_invalidate_cb, }; +static void get_tph_mr_dmabuf(struct mlx5_ib_dev *dev, struct dma_buf *dmabuf, + u16 *st_index, u8 *ph) +{ + u16 local_st_index; + u16 steering_tag; + u8 local_ph; + bool extended; + int ret; + + switch (pcie_tph_enabled_req_type(dev->mdev->pdev)) { + case PCI_TPH_REQ_TPH_ONLY: + extended = false; + break; + case PCI_TPH_REQ_EXT_TPH: + extended = true; + break; + default: + return; + } + + dma_resv_lock(dmabuf->resv, NULL); + ret = dma_buf_get_tph(dmabuf, extended, &steering_tag, &local_ph); + dma_resv_unlock(dmabuf->resv); + if (ret) { + if (ret != -EOPNOTSUPP) + mlx5_ib_dbg(dev, "get_tph failed (%d)\n", ret); + return; + } + + ret = mlx5_st_alloc_index_by_tag(dev->mdev, steering_tag, + &local_st_index); + if (ret) { + mlx5_ib_dbg(dev, "st_alloc_index_by_tag failed (%d)\n", ret); + return; + } + + *st_index = local_st_index; + *ph = local_ph; +} + +static void mlx5_ib_mr_put_frmr_st_handle_ref(struct mlx5_ib_mr *mr) +{ + mlx5_ib_put_frmr_st_handle_ref(mr_to_mdev(mr), + mr->ibmr.frmr.key.kernel_vendor_key); +} + static struct ib_mr * reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, u64 offset, u64 length, u64 virt_addr, @@ -941,12 +1032,22 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, ph = dmah->ph; if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) st_index = mdmah->st_index; + + err = mlx5_ib_get_frmr_st_handle_ref(dev, st_index); + if (err) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(err); + } + } else { + get_tph_mr_dmabuf(dev, umem_dmabuf->attach->dmabuf, + &st_index, &ph); } mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, access_flags, access_mode, st_index, ph); if (IS_ERR(mr)) { + mlx5_ib_put_st_index_ref(dev, st_index); ib_umem_release(&umem_dmabuf->umem); return ERR_CAST(mr); } @@ -1400,6 +1501,8 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) dma_resv_unlock( to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); } + if (!ret) + mlx5_ib_mr_put_frmr_st_handle_ref(mr); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c index 7cedc348790d..877b37b4e639 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c @@ -92,23 +92,18 @@ void mlx5_st_destroy(struct mlx5_core_dev *dev) kfree(st); } -int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, - unsigned int cpu_uid, u16 *st_index) +int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, u16 tag, + u16 *st_index) { struct mlx5_st_idx_data *idx_data; struct mlx5_st *st = dev->st; unsigned long index; u32 xa_id; - u16 tag; - int ret; + int ret = 0; if (!st) return -EOPNOTSUPP; - ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag); - if (ret) - return ret; - if (st->direct_mode) { *st_index = tag; return 0; @@ -152,8 +147,46 @@ int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, mutex_unlock(&st->lock); return ret; } +EXPORT_SYMBOL_GPL(mlx5_st_alloc_index_by_tag); + +int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *st_index) +{ + u16 tag; + int ret; + + ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag); + if (ret) + return ret; + + return mlx5_st_alloc_index_by_tag(dev, tag, st_index); +} EXPORT_SYMBOL_GPL(mlx5_st_alloc_index); +int mlx5_st_get_index(struct mlx5_core_dev *dev, u16 st_index) +{ + struct mlx5_st_idx_data *idx_data; + struct mlx5_st *st = dev->st; + int ret = 0; + + if (!st) + return -EOPNOTSUPP; + + if (st->direct_mode) + return 0; + + mutex_lock(&st->lock); + idx_data = xa_load(&st->idx_xa, st_index); + if (WARN_ON_ONCE(!idx_data)) + ret = -EINVAL; + else + refcount_inc(&idx_data->usecount); + mutex_unlock(&st->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_st_get_index); + int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index) { struct mlx5_st_idx_data *idx_data; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 04b96c5abb57..0480b5c4f189 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1166,10 +1166,22 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type u64 length, u16 uid, phys_addr_t addr, u32 obj_id); #ifdef CONFIG_PCIE_TPH +int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, u16 tag, + u16 *st_index); +int mlx5_st_get_index(struct mlx5_core_dev *dev, u16 st_index); int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, unsigned int cpu_uid, u16 *st_index); int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index); #else +static inline int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, + u16 tag, u16 *st_index) +{ + return -EOPNOTSUPP; +} +static inline int mlx5_st_get_index(struct mlx5_core_dev *dev, u16 st_index) +{ + return -EOPNOTSUPP; +} static inline int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, unsigned int cpu_uid, u16 *st_index) diff --git a/include/rdma/frmr_pools.h b/include/rdma/frmr_pools.h index af1b88801fa4..a08d2b2cf9f3 100644 --- a/include/rdma/frmr_pools.h +++ b/include/rdma/frmr_pools.h @@ -24,7 +24,8 @@ struct ib_frmr_key { struct ib_frmr_pool_ops { int (*create_frmrs)(struct ib_device *device, struct ib_frmr_key *key, u32 *handles, u32 count); - void (*destroy_frmrs)(struct ib_device *device, u32 *handles, + void (*destroy_frmrs)(struct ib_device *device, + const struct ib_frmr_key *key, u32 *handles, u32 count); int (*build_key)(struct ib_device *device, const struct ib_frmr_key *in, struct ib_frmr_key *out); @@ -33,7 +34,7 @@ struct ib_frmr_pool_ops { int ib_frmr_pools_init(struct ib_device *device, const struct ib_frmr_pool_ops *pool_ops); void ib_frmr_pools_cleanup(struct ib_device *device); -int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr); +int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr, bool *reused); int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr); #endif /* FRMR_POOLS_H */ -- 2.53.0-Meta
