Query dma-buf TPH metadata when registering a dma-buf MR for peer-to-
peer access and translate the returned steering tag into an mlx5 ST
index. Keep the DMAH path as the first priority and only fall back to
DMA-buf metadata when no DMAH is supplied.

Track per-MR ownership of the allocated ST index and release it on MR
setup failure, destroy, and FRMR-pool reuse. Release the ST index before
the MR is pushed back into the FRMR pool, and free mlx5_st_idx_data when
its refcount reaches zero so repeated allocation/deallocation does not
leak memory.

Signed-off-by: Zhiping Zhang <[email protected]>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h          |  6 ++
 drivers/infiniband/hw/mlx5/mr.c               | 86 ++++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/lib/st.c  | 28 ++++--
 include/linux/mlx5/driver.h                   |  7 ++
 4 files changed, 115 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e156dc4d7529..4ab867392267 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -721,6 +721,12 @@ struct mlx5_ib_mr {
                        u8 revoked :1;
                        /* Indicates previous dmabuf page fault occurred */
                        u8 dmabuf_faulted:1;
+                       /* Set when the MR owns dmabuf_st_index and must
+                        * release it via mlx5_st_dealloc_index() once the
+                        * firmware mkey is no longer referencing it.
+                        */
+                       u8 dmabuf_st_owned:1;
+                       u16 dmabuf_st_index;
                        struct mlx5_ib_mkey null_mmkey;
                };
        };
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3b6da45061a5..8059b5e4da97 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -38,6 +38,7 @@
 #include <linux/delay.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-resv.h>
+#include <linux/pci-tph.h>
 #include <rdma/frmr_pools.h>
 #include <rdma/ib_umem_odp.h>
 #include "dm.h"
@@ -46,6 +47,8 @@
 #include "data_direct.h"
 #include "dmah.h"
 
+MODULE_IMPORT_NS("DMA_BUF");
+
 static int mkey_max_umr_order(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
@@ -899,6 +902,63 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops 
= {
        .invalidate_mappings = mlx5_ib_dmabuf_invalidate_cb,
 };
 
+/*
+ * Query TPH metadata from @dmabuf and translate the raw steering tag into
+ * an mlx5 ST index. On success, returns 0 and the caller becomes the
+ * owner of *@st_index (must be released with mlx5_st_dealloc_index()
+ * once the firmware mkey no longer references it). On any failure
+ * *@st_index and *@ph are left as the no-TPH defaults set by the caller.
+ *
+ * @dmabuf must already be referenced by the caller (e.g. via the umem's
+ * attachment) so we don't re-resolve the user's fd here and avoid a
+ * dup2() TOCTOU between umem creation and TPH lookup.
+ */
+static void get_tph_mr_dmabuf(struct mlx5_ib_dev *dev, struct dma_buf *dmabuf,
+                             u16 *st_index, u8 *ph)
+{
+       u8 req_type;
+       u16 steering_tag;
+       u8 st_width;
+       int ret;
+
+       if (!dmabuf->ops->get_tph)
+               return;
+
+       req_type = pcie_tph_enabled_req_type(dev->mdev->pdev);
+       switch (req_type) {
+       case PCI_TPH_REQ_TPH_ONLY:
+               st_width = 8;
+               break;
+       case PCI_TPH_REQ_EXT_TPH:
+               st_width = 16;
+               break;
+       default:
+               return;
+       }
+
+       ret = dmabuf->ops->get_tph(dmabuf, &steering_tag, ph, st_width);
+       if (ret) {
+               mlx5_ib_dbg(dev, "get_tph failed (%d)\n", ret);
+               *ph = MLX5_IB_NO_PH;
+               return;
+       }
+
+       ret = mlx5_st_alloc_index_by_tag(dev->mdev, steering_tag, st_index);
+       if (ret) {
+               *ph = MLX5_IB_NO_PH;
+               mlx5_ib_dbg(dev, "st_alloc_index_by_tag failed (%d)\n", ret);
+       }
+}
+
+static void mlx5_ib_mr_put_dmabuf_st(struct mlx5_ib_mr *mr)
+{
+       if (mr->umem && mr->dmabuf_st_owned) {
+               mlx5_st_dealloc_index(mr_to_mdev(mr)->mdev,
+                                     mr->dmabuf_st_index);
+               mr->dmabuf_st_owned = 0;
+       }
+}
+
 static struct ib_mr *
 reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
                   u64 offset, u64 length, u64 virt_addr,
@@ -941,16 +1001,26 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device 
*dma_device,
                ph = dmah->ph;
                if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
                        st_index = mdmah->st_index;
+       } else {
+               get_tph_mr_dmabuf(dev, umem_dmabuf->attach->dmabuf,
+                                 &st_index, &ph);
        }
 
        mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
                                access_flags, access_mode,
                                st_index, ph);
        if (IS_ERR(mr)) {
+               if (!dmah && st_index != 
MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
+                       mlx5_st_dealloc_index(dev->mdev, st_index);
                ib_umem_release(&umem_dmabuf->umem);
                return ERR_CAST(mr);
        }
 
+       if (!dmah && st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) {
+               mr->dmabuf_st_index = st_index;
+               mr->dmabuf_st_owned = 1;
+       }
+
        mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
 
        atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
@@ -1377,9 +1447,17 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr 
*mr)
        bool is_odp = is_odp_mr(mr);
        int ret;
 
-       if (mr->ibmr.frmr.pool && !mlx5_umr_revoke_mr_with_lock(mr) &&
-           !ib_frmr_pool_push(mr->ibmr.device, &mr->ibmr))
-               return 0;
+       if (mr->ibmr.frmr.pool && !mlx5_umr_revoke_mr_with_lock(mr)) {
+               /*
+                * The mkey has been revoked: firmware no longer references
+                * dmabuf_st_index, so release it before this mr can re-enter
+                * the FRMR cache for reuse by another registration.
+                */
+               mlx5_ib_mr_put_dmabuf_st(mr);
+
+               if (!ib_frmr_pool_push(mr->ibmr.device, &mr->ibmr))
+                       return 0;
+       }
 
        if (is_odp)
                mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
@@ -1400,6 +1478,8 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr 
*mr)
                dma_resv_unlock(
                        to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
        }
+       if (!ret)
+               mlx5_ib_mr_put_dmabuf_st(mr);
        return ret;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c 
b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c
index 997be91f0a13..8929c17c88bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c
@@ -29,7 +29,7 @@ struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev)
        u8 direct_mode = 0;
        u16 num_entries;
        u32 tbl_loc;
-       int ret;
+       int ret = 0;
 
        if (!MLX5_CAP_GEN(dev, mkey_pcie_tph))
                return NULL;
@@ -92,23 +92,18 @@ void mlx5_st_destroy(struct mlx5_core_dev *dev)
        kfree(st);
 }
 
-int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
-                       unsigned int cpu_uid, u16 *st_index)
+int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, u16 tag,
+                              u16 *st_index)
 {
        struct mlx5_st_idx_data *idx_data;
        struct mlx5_st *st = dev->st;
        unsigned long index;
        u32 xa_id;
-       u16 tag;
-       int ret;
+       int ret = 0;
 
        if (!st)
                return -EOPNOTSUPP;
 
-       ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag);
-       if (ret)
-               return ret;
-
        if (st->direct_mode) {
                *st_index = tag;
                return 0;
@@ -152,6 +147,20 @@ int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum 
tph_mem_type mem_type,
        mutex_unlock(&st->lock);
        return ret;
 }
+EXPORT_SYMBOL_GPL(mlx5_st_alloc_index_by_tag);
+
+int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
+                       unsigned int cpu_uid, u16 *st_index)
+{
+       u16 tag;
+       int ret;
+
+       ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag);
+       if (ret)
+               return ret;
+
+       return mlx5_st_alloc_index_by_tag(dev, tag, st_index);
+}
 EXPORT_SYMBOL_GPL(mlx5_st_alloc_index);
 
 int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index)
@@ -175,6 +184,7 @@ int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 
st_index)
 
        if (refcount_dec_and_test(&idx_data->usecount)) {
                xa_erase(&st->idx_xa, st_index);
+               kfree(idx_data);
                /* We leave PCI config space as was before, no mkey will refer 
to it */
        }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04b96c5abb57..523a9ab0ae1e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1166,10 +1166,17 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, 
enum mlx5_sw_icm_type type
                           u64 length, u16 uid, phys_addr_t addr, u32 obj_id);
 
 #ifdef CONFIG_PCIE_TPH
+int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, u16 tag,
+                              u16 *st_index);
 int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type,
                        unsigned int cpu_uid, u16 *st_index);
 int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index);
 #else
+static inline int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev,
+                                            u16 tag, u16 *st_index)
+{
+       return -EOPNOTSUPP;
+}
 static inline int mlx5_st_alloc_index(struct mlx5_core_dev *dev,
                                      enum tph_mem_type mem_type,
                                      unsigned int cpu_uid, u16 *st_index)
-- 
2.53.0-Meta

Reply via email to