From: Aharon Landau <[email protected]>

Currently, the cache structure is a linear array held within
mlx5_ib_dev. Each entry in the array holds a list_head of mkeys whose
sizes are the order of the entry they are in (i.e. in entry number 2
there will be mkeys of size 4, at entry 3 mkeys of size 8, and so on).
The access flags of all cached mkeys are
IB_ACCESS_DISABLE_RELAXED_ORDERING.

This structure does not allow adding new entries to the cache.
Therefore, the cache can only hold mkeys that meet the above conditions
(size of some power of 2 and access_flag =
IB_ACCESS_DISABLE_RELAXED_ORDERING).
Later in the series, we would like to allow caching mkeys with different
sizes and different access_flag. Adapting the cache structure for this
purpose.

Change the cache structure to an RB-tree, where every node is an entry
that holds an mkeys list. The tree key is the access_flag as MSB and the
size of mkey as LSB. mlx5_ib_dev will hold the root of the tree.
When initializing a device, the default entries will be generated, that
is, entries for mkeys' size = 2^x and access_flag =
IB_ACCESS_DISABLE_RELAXED_ORDERING.

Signed-off-by: Aharon Landau <[email protected]>
Reviewed-by: Shay Drory <[email protected]>
Signed-off-by: Leon Romanovsky <[email protected]>
---
 drivers/infiniband/hw/mlx5/main.c    |   4 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  34 +++-
 drivers/infiniband/hw/mlx5/mr.c      | 271 ++++++++++++++++++++-------
 drivers/infiniband/hw/mlx5/odp.c     |  43 +++--
 include/linux/mlx5/driver.h          |   4 +-
 5 files changed, 261 insertions(+), 95 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index 849bf016d8ae..c46581686258 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4051,7 +4051,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct 
mlx5_ib_dev *dev)
 {
        int err;
 
-       err = mlx5_mkey_cache_cleanup(dev);
+       err = mlx5_mkey_cache_tree_cleanup(dev);
        if (err)
                mlx5_ib_warn(dev, "mr cache cleanup failed\n");
 
@@ -4154,7 +4154,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct 
mlx5_ib_dev *dev)
        dev->umrc.pd = pd;
 
        sema_init(&dev->umrc.sem, MAX_UMR_WR);
-       ret = mlx5_mkey_cache_init(dev);
+       ret = mlx5_mkey_cache_tree_init(dev);
        if (ret) {
                mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
                goto error_4;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h 
b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ffb6f1d41f3d..e22eeceae9eb 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -749,7 +749,7 @@ struct mlx5_cache_ent {
 
 
        char                    name[4];
-       u32                     order;
+       u32                     order;
        u32                     xlt;
        u32                     access_mode;
        u32                     page;
@@ -777,11 +777,22 @@ struct mlx5_cache_ent {
        struct mlx5_ib_dev     *dev;
        struct work_struct      work;
        struct delayed_work     dwork;
+
+       struct rb_node          node;
+       unsigned int            entry_flags;
+};
+
+enum {
+       MLX5_CACHE_ENTRY_FLAG_IMR_MTT = (1 << 0),
+       MLX5_CACHE_ENTRY_FLAG_IMR_KSM = (1 << 1),
+       MLX5_CACHE_ENTRY_FLAG_REMOTE_ATOMIC = (1 << 2),
+       MLX5_CACHE_ENTRY_FLAG_RELAXED_ORDERING = (1 << 3),
 };
 
-struct mlx5_mkey_cache {
+struct mlx5_mkey_cache_tree {
+       struct rb_root          cache_root;
+       struct mutex            cache_lock;
        struct workqueue_struct *wq;
-       struct mlx5_cache_ent   ent[MAX_MKEY_CACHE_ENTRIES];
        struct dentry           *root;
        unsigned long           last_add;
 };
@@ -1065,7 +1076,7 @@ struct mlx5_ib_dev {
        struct mlx5_ib_resources        devr;
 
        atomic_t                        mkey_var;
-       struct mlx5_mkey_cache          cache;
+       struct mlx5_mkey_cache_tree     cache;
        struct timer_list               delay_timer;
        /* Prevents soft lock on massive reg MRs */
        struct mutex                    slow_path_mutex;
@@ -1313,8 +1324,8 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t 
page_size, __be64 *pas,
                          u64 access_flags);
 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
 int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
-int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
-int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_mkey_cache_tree_init(struct mlx5_ib_dev *dev);
+int mlx5_mkey_cache_tree_cleanup(struct mlx5_ib_dev *dev);
 
 struct mlx5_ib_mr *mlx5_alloc_special_mkey(struct mlx5_ib_dev *dev,
                                           unsigned int entry,
@@ -1335,6 +1346,9 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table 
*wq_ind_table);
 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
                                struct ib_dm_mr_attr *attr,
                                struct uverbs_attr_bundle *attrs);
+struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev,
+                                               int entry_flags, int size,
+                                               int order);
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
@@ -1342,7 +1356,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct 
mlx5_ib_pf_eq *eq);
 void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
 int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent);
+int mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev, int ent_num);
 void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
                           struct mlx5_ib_mr *mr, int flags);
 
@@ -1361,7 +1375,11 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev 
*dev,
 static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
 static inline int mlx5_ib_odp_init(void) { return 0; }
 static inline void mlx5_ib_odp_cleanup(void)                               {}
-static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) 
{}
+static inline int mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev,
+                                                int ent_num)
+{
+       return 0;
+}
 static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t 
nentries,
                                         struct mlx5_ib_mr *mr, int flags) {}
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8d7de4eddc11..7c67aa4f1f1e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -432,20 +432,30 @@ static const struct file_operations limit_fops = {
        .read   = limit_read,
 };
 
-static bool someone_adding(struct mlx5_mkey_cache *cache)
+static bool is_special_ent(int ent_flags)
 {
-       unsigned int i;
+       return ent_flags &
+              (MLX5_CACHE_ENTRY_FLAG_IMR_MTT | MLX5_CACHE_ENTRY_FLAG_IMR_KSM);
+}
 
-       for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
-               struct mlx5_cache_ent *ent = &cache->ent[i];
-               bool ret;
+static bool someone_adding(struct mlx5_mkey_cache_tree *cache)
+{
+       struct mlx5_cache_ent *ent;
+       struct rb_node *node;
+       bool ret;
 
+       mutex_lock(&cache->cache_lock);
+       for (node = rb_first(&cache->cache_root); node; node = rb_next(node)) {
+               ent = container_of(node, struct mlx5_cache_ent, node);
                spin_lock_irq(&ent->lock);
                ret = ent->available_mkeys < ent->limit;
                spin_unlock_irq(&ent->lock);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&cache->cache_lock);
                        return true;
+               }
        }
+       mutex_unlock(&cache->cache_lock);
        return false;
 }
 
@@ -486,7 +496,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent 
*ent)
 static void __cache_work_func(struct mlx5_cache_ent *ent)
 {
        struct mlx5_ib_dev *dev = ent->dev;
-       struct mlx5_mkey_cache *cache = &dev->cache;
+       struct mlx5_mkey_cache_tree *cache = &dev->cache;
        int err;
 
        spin_lock_irq(&ent->lock);
@@ -563,29 +573,142 @@ static void cache_work_func(struct work_struct *work)
        __cache_work_func(ent);
 }
 
+static int mlx5_ent_access_flags(struct mlx5_ib_dev *dev, int access_flags)
+{
+       int ret = 0;
+
+       if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+           MLX5_CAP_GEN(dev->mdev, atomic) &&
+           MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+               ret |= MLX5_CACHE_ENTRY_FLAG_REMOTE_ATOMIC;
+
+       if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+           MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+           !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+               ret |= MLX5_CACHE_ENTRY_FLAG_RELAXED_ORDERING;
+
+       if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+           MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+           !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+               ret |= MLX5_CACHE_ENTRY_FLAG_RELAXED_ORDERING;
+
+       return ret;
+
+}
+
+static int ent_insert(struct mlx5_mkey_cache_tree *cache,
+                     struct mlx5_cache_ent *ent)
+{
+       struct rb_node **new = &cache->cache_root.rb_node, *parent = NULL;
+       struct mlx5_cache_ent *this;
+
+       /* Figure out where to put new node */
+       while (*new) {
+               this = container_of(*new, struct mlx5_cache_ent, node);
+               parent = *new;
+               if (ent->entry_flags < this->entry_flags)
+                       new = &((*new)->rb_left);
+               else if (ent->entry_flags > this->entry_flags)
+                       new = &((*new)->rb_right);
+               else {
+                       if (ent->xlt < this->xlt)
+                               new = &((*new)->rb_left);
+                       else if (ent->xlt > this->xlt)
+                               new = &((*new)->rb_right);
+                       else
+                               return -EEXIST;
+               }
+       }
+
+       /* Add new node and rebalance tree. */
+       rb_link_node(&ent->node, parent, new);
+       rb_insert_color(&ent->node, &cache->cache_root);
+
+       return 0;
+}
+
+struct mlx5_cache_ent *mlx5_ib_create_cache_ent(struct mlx5_ib_dev *dev,
+                                               int entry_flags, int xlt_size,
+                                               int order)
+{
+       struct mlx5_cache_ent *ent;
+       int ret;
+
+       ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+       if (!ent)
+               return ERR_PTR(-ENOMEM);
+       INIT_LIST_HEAD(&ent->head);
+       spin_lock_init(&ent->lock);
+       ent->entry_flags = entry_flags;
+       ent->xlt = xlt_size;
+       ent->order = order;
+       ent->dev = dev;
+
+       INIT_WORK(&ent->work, cache_work_func);
+       INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+
+       mutex_lock(&dev->cache.cache_lock);
+       ret = ent_insert(&dev->cache, ent);
+       mutex_unlock(&dev->cache.cache_lock);
+       if (ret) {
+               kfree(ent);
+               return ERR_PTR(ret);
+       }
+       return ent;
+}
+
+static struct mlx5_cache_ent *mkey_cache_ent_from_size(struct mlx5_ib_dev *dev,
+                                                      int ent_flags, int size)
+{
+       struct rb_node *node = dev->cache.cache_root.rb_node;
+       struct mlx5_cache_ent *cur, *prev = NULL;
+
+       WARN_ON(!mutex_is_locked(&dev->cache.cache_lock));
+       while (node) {
+               cur = container_of(node, struct mlx5_cache_ent, node);
+
+               if (cur->entry_flags > ent_flags)
+                       node = node->rb_left;
+               else if (cur->entry_flags < ent_flags)
+                       node = node->rb_right;
+               else {
+                       if (cur->xlt > size) {
+                               prev = cur;
+                               node = node->rb_left;
+                       } else if (cur->xlt < size)
+                               node = node->rb_right;
+                       else
+                               return cur;
+               }
+       }
+       return prev;
+}
+
 /* Get an Mkey from a special cache entry */
 struct mlx5_ib_mr *mlx5_alloc_special_mkey(struct mlx5_ib_dev *dev,
                                           unsigned int entry, int access_flags)
 {
-       struct mlx5_mkey_cache *cache = &dev->cache;
        struct mlx5r_cache_mkey *cmkey;
        struct mlx5_cache_ent *ent;
        struct mlx5_ib_mr *mr;
+       int ent_flags;
        int err;
 
-       if (WARN_ON(entry <= MKEY_CACHE_LAST_STD_ENTRY ||
-                   entry >= ARRAY_SIZE(cache->ent)))
+       if (WARN_ON(!is_special_ent(entry)))
                return ERR_PTR(-EINVAL);
 
-       /* Matches access in alloc_cache_mr() */
-       if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
+       ent_flags = entry | mlx5_ent_access_flags(dev, access_flags);
+
+       mutex_lock(&dev->cache.cache_lock);
+       ent = mkey_cache_ent_from_size(dev, ent_flags, 0);
+       mutex_unlock(&dev->cache.cache_lock);
+       if (!ent)
                return ERR_PTR(-EOPNOTSUPP);
 
        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
-       ent = &cache->ent[entry];
        spin_lock_irq(&ent->lock);
        if (list_empty(&ent->head)) {
                spin_unlock_irq(&ent->lock);
@@ -616,13 +739,18 @@ struct mlx5_ib_mr *mlx5_alloc_special_mkey(struct 
mlx5_ib_dev *dev,
 static struct mlx5r_cache_mkey *get_cache_mkey(struct mlx5_cache_ent *req_ent)
 {
        struct mlx5_ib_dev *dev = req_ent->dev;
-       struct mlx5_cache_ent *ent = req_ent;
        struct mlx5r_cache_mkey *cmkey;
+       struct mlx5_cache_ent *ent;
+       struct rb_node *node;
 
        /* Try larger Mkey pools from the cache to satisfy the allocation */
-       for (; ent != &dev->cache.ent[MKEY_CACHE_LAST_STD_ENTRY + 1]; ent++) {
-               mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
-                           ent - dev->cache.ent);
+       mutex_lock(&dev->cache.cache_lock);
+       for (node = &req_ent->node; node; node = rb_next(node)) {
+               ent = container_of(node, struct mlx5_cache_ent, node);
+
+               if (ent->entry_flags != req_ent->entry_flags)
+                       break;
+               mlx5_ib_dbg(dev, "size %d\n", ent->xlt);
 
                spin_lock_irq(&ent->lock);
                if (!list_empty(&ent->head)) {
@@ -632,11 +760,13 @@ static struct mlx5r_cache_mkey *get_cache_mkey(struct 
mlx5_cache_ent *req_ent)
                        ent->available_mkeys--;
                        queue_adjust_cache_locked(ent);
                        spin_unlock_irq(&ent->lock);
+                       mutex_unlock(&dev->cache.cache_lock);
                        return cmkey;
                }
                queue_adjust_cache_locked(ent);
                spin_unlock_irq(&ent->lock);
        }
+       mutex_unlock(&dev->cache.cache_lock);
        req_ent->miss++;
        return NULL;
 }
@@ -662,10 +792,8 @@ static int mlx5_free_mkey(struct mlx5_ib_dev *dev, struct 
mlx5_ib_mr *mr)
        return 0;
 }
 
-static void clean_keys(struct mlx5_ib_dev *dev, int c)
+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
 {
-       struct mlx5_mkey_cache *cache = &dev->cache;
-       struct mlx5_cache_ent *ent = &cache->ent[c];
        struct mlx5r_cache_mkey *tmp_mkey, *mkey;
        LIST_HEAD(del_list);
 
@@ -691,7 +819,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
        }
 }
 
-static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_tree_debugfs_cleanup(struct mlx5_ib_dev *dev)
 {
        if (!mlx5_debugfs_root || dev->is_rep)
                return;
@@ -700,20 +828,25 @@ static void mlx5_mkey_cache_debugfs_cleanup(struct 
mlx5_ib_dev *dev)
        dev->cache.root = NULL;
 }
 
-static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_tree_debugfs_init(struct mlx5_ib_dev *dev)
 {
-       struct mlx5_mkey_cache *cache = &dev->cache;
+       struct mlx5_mkey_cache_tree *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
+       struct rb_node *node;
        struct dentry *dir;
-       int i;
 
        if (!mlx5_debugfs_root || dev->is_rep)
                return;
 
        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
 
-       for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
-               ent = &cache->ent[i];
+       mutex_lock(&dev->cache.cache_lock);
+       for (node = rb_first(&cache->cache_root); node; node = rb_next(node)) {
+               ent = container_of(node, struct mlx5_cache_ent, node);
+
+               if (!ent->order)
+                       continue;
+
                sprintf(ent->name, "%d", ent->order);
                dir = debugfs_create_dir(ent->name, cache->root);
                debugfs_create_file("size", 0600, dir, ent, &size_fops);
@@ -721,6 +854,7 @@ static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev 
*dev)
                debugfs_create_u32("cur", 0400, dir, &ent->available_mkeys);
                debugfs_create_u32("miss", 0600, dir, &ent->miss);
        }
+       mutex_unlock(&dev->cache.cache_lock);
 }
 
 static void delay_time_func(struct timer_list *t)
@@ -730,13 +864,16 @@ static void delay_time_func(struct timer_list *t)
        WRITE_ONCE(dev->fill_delay, 0);
 }
 
-int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_tree_init(struct mlx5_ib_dev *dev)
 {
-       struct mlx5_mkey_cache *cache = &dev->cache;
+       struct mlx5_mkey_cache_tree *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
+       int err;
        int i;
 
        mutex_init(&dev->slow_path_mutex);
+       mutex_init(&cache->cache_lock);
+       cache->cache_root = RB_ROOT;
        cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
        if (!cache->wq) {
                mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -745,28 +882,25 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
 
        mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
        timer_setup(&dev->delay_timer, delay_time_func, 0);
-       for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
-               ent = &cache->ent[i];
-               INIT_LIST_HEAD(&ent->head);
-               spin_lock_init(&ent->lock);
-               ent->order = i + 2;
-               ent->dev = dev;
-               ent->limit = 0;
-
-               INIT_WORK(&ent->work, cache_work_func);
-               INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+       for (i = 0; i < MAX_MKEY_CACHE_DEFAULT_ENTRIES; i++) {
+               u8 order = i + 2;
+               u32 xlt_size = (1 << order) * sizeof(struct mlx5_mtt) /
+                              MLX5_IB_UMR_OCTOWORD;
 
                if (i > MKEY_CACHE_LAST_STD_ENTRY) {
-                       mlx5_odp_init_mkey_cache_entry(ent);
+                       err = mlx5_odp_init_mkey_cache_entry(dev, i);
+                       if (err)
+                               return err;
                        continue;
                }
 
-               if (ent->order > mkey_cache_max_order(dev))
+               ent = mlx5_ib_create_cache_ent(dev, 0, xlt_size, order);
+               if (IS_ERR(ent))
+                       return PTR_ERR(ent);
+               if (order > mkey_cache_max_order(dev))
                        continue;
 
                ent->page = PAGE_SHIFT;
-               ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
-                          MLX5_IB_UMR_OCTOWORD;
                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
                    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
@@ -778,22 +912,22 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
                queue_adjust_cache_locked(ent);
                spin_unlock_irq(&ent->lock);
        }
-
-       mlx5_mkey_cache_debugfs_init(dev);
-
+       mlx5_mkey_cache_tree_debugfs_init(dev);
        return 0;
 }
 
-int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_tree_cleanup(struct mlx5_ib_dev *dev)
 {
-       unsigned int i;
+       struct rb_root *root = &dev->cache.cache_root;
+       struct mlx5_cache_ent *ent, *tmp;
+       struct rb_node *node;
 
        if (!dev->cache.wq)
                return 0;
 
-       for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) {
-               struct mlx5_cache_ent *ent = &dev->cache.ent[i];
-
+       mutex_lock(&dev->cache.cache_lock);
+       for (node = rb_first(root); node; node = rb_next(node)) {
+               ent = container_of(node, struct mlx5_cache_ent, node);
                spin_lock_irq(&ent->lock);
                ent->disabled = true;
                spin_unlock_irq(&ent->lock);
@@ -801,11 +935,15 @@ int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
                cancel_delayed_work_sync(&ent->dwork);
        }
 
-       mlx5_mkey_cache_debugfs_cleanup(dev);
+       mlx5_mkey_cache_tree_debugfs_cleanup(dev);
        mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
 
-       for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++)
-               clean_keys(dev, i);
+       rbtree_postorder_for_each_entry_safe(ent, tmp, root, node) {
+               clean_keys(dev, ent);
+               rb_erase(&ent->node, root);
+               kfree(ent);
+       }
+       mutex_unlock(&dev->cache.cache_lock);
 
        destroy_workqueue(dev->cache.wq);
        del_timer_sync(&dev->delay_timer);
@@ -921,19 +1059,6 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
        return err;
 }
 
-static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev 
*dev,
-                                                       unsigned int order)
-{
-       struct mlx5_mkey_cache *cache = &dev->cache;
-
-       if (order < cache->ent[0].order)
-               return &cache->ent[0];
-       order = order - cache->ent[0].order;
-       if (order > MKEY_CACHE_LAST_STD_ENTRY)
-               return NULL;
-       return &cache->ent[order];
-}
-
 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
                          u64 length, int access_flags)
 {
@@ -964,6 +1089,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd 
*pd,
        struct mlx5_cache_ent *ent;
        struct mlx5_ib_mr *mr;
        unsigned int page_size;
+       int ent_flags;
+       int xlt_size;
        int ret;
 
        if (umem->is_dmabuf)
@@ -973,14 +1100,16 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct 
ib_pd *pd,
                                                     0, iova);
        if (WARN_ON(!page_size))
                return ERR_PTR(-EINVAL);
-       ent = mkey_cache_ent_from_order(
-               dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
+       ent_flags = mlx5_ent_access_flags(dev, access_flags);
+       xlt_size = get_octo_len(iova, umem->length, order_base_2(page_size));
+       mutex_lock(&dev->cache.cache_lock);
+       ent = mkey_cache_ent_from_size(dev, ent_flags, xlt_size);
+       mutex_unlock(&dev->cache.cache_lock);
        /*
         * Matches access in alloc_cache_mr(). If the MR can't come from the
         * cache then synchronously create an uncached one.
         */
-       if (!ent || ent->limit == 0 ||
-           !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
+       if (!ent || ent->limit == 0) {
                mutex_lock(&dev->slow_path_mutex);
                mr = reg_create(pd, umem, iova, access_flags, page_size, false);
                mutex_unlock(&dev->slow_path_mutex);
@@ -1774,7 +1903,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
        if (WARN_ON(!*page_size))
                return false;
        return (1ULL << mr->mmkey.cache_ent->order) >=
-              ib_umem_num_dma_blocks(new_umem, *page_size);
+                              ib_umem_num_dma_blocks(new_umem, *page_size);
 }
 
 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 9c7942118d2c..e4a78b4c6034 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -418,7 +418,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct 
mlx5_ib_mr *imr,
        if (IS_ERR(odp))
                return ERR_CAST(odp);
 
-       mr = mlx5_alloc_special_mkey(mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY,
+       mr = mlx5_alloc_special_mkey(mr_to_mdev(imr),
+                                    MLX5_CACHE_ENTRY_FLAG_IMR_MTT,
                                     imr->access_flags);
        if (IS_ERR(mr)) {
                ib_umem_odp_release(odp);
@@ -493,7 +494,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct 
mlx5_ib_pd *pd,
        if (IS_ERR(umem_odp))
                return ERR_CAST(umem_odp);
 
-       imr = mlx5_alloc_special_mkey(dev, MLX5_IMR_KSM_CACHE_ENTRY,
+       imr = mlx5_alloc_special_mkey(dev, MLX5_CACHE_ENTRY_FLAG_IMR_KSM,
                                      access_flags);
        if (IS_ERR(imr)) {
                ib_umem_odp_release(umem_odp);
@@ -1605,30 +1606,48 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct 
mlx5_ib_pf_eq *eq)
        return err;
 }
 
-void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent)
+int mlx5_odp_init_mkey_cache_entry(struct mlx5_ib_dev *dev, int ent_num)
 {
-       if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
-               return;
+       struct mlx5_cache_ent *ent;
+       int ent_flags;
+       u32 xlt_size;
+
+       if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+               return 0;
 
-       switch (ent->order - 2) {
+       switch (ent_num) {
        case MLX5_IMR_MTT_CACHE_ENTRY:
-               ent->page = PAGE_SHIFT;
-               ent->xlt = MLX5_IMR_MTT_ENTRIES *
-                          sizeof(struct mlx5_mtt) /
+               xlt_size = MLX5_IMR_MTT_ENTRIES * sizeof(struct mlx5_mtt) /
                           MLX5_IB_UMR_OCTOWORD;
+               ent_flags = MLX5_CACHE_ENTRY_FLAG_IMR_MTT;
+
+               ent = mlx5_ib_create_cache_ent(dev, ent_flags, xlt_size,
+                                              ent_num + 2);
+               if (IS_ERR(ent))
+                       return PTR_ERR(ent);
+
+               ent->page = PAGE_SHIFT;
                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
                ent->limit = 0;
                break;
 
        case MLX5_IMR_KSM_CACHE_ENTRY:
-               ent->page = MLX5_KSM_PAGE_SHIFT;
-               ent->xlt = mlx5_imr_ksm_entries *
-                          sizeof(struct mlx5_klm) /
+               xlt_size = mlx5_imr_ksm_entries * sizeof(struct mlx5_klm) /
                           MLX5_IB_UMR_OCTOWORD;
+               ent_flags = MLX5_CACHE_ENTRY_FLAG_IMR_KSM;
+
+               ent = mlx5_ib_create_cache_ent(dev, ent_flags, xlt_size,
+                                              ent_num + 2);
+               if (IS_ERR(ent))
+                       return PTR_ERR(ent);
+
+               ent->page = MLX5_KSM_PAGE_SHIFT;
                ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
                ent->limit = 0;
                break;
        }
+
+       return 0;
 }
 
 static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8191140454e1..bb459a2ca18c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1080,7 +1080,7 @@ enum {
        MKEY_CACHE_LAST_STD_ENTRY = 20,
        MLX5_IMR_MTT_CACHE_ENTRY,
        MLX5_IMR_KSM_CACHE_ENTRY,
-       MAX_MKEY_CACHE_ENTRIES
+       MAX_MKEY_CACHE_DEFAULT_ENTRIES
 };
 
 /* Async-atomic event notifier used by mlx5 core to forward FW
@@ -1142,7 +1142,7 @@ struct mlx5_profile {
        struct {
                int     size;
                int     limit;
-       } mr_cache[MAX_MKEY_CACHE_ENTRIES];
+       } mr_cache[MAX_MKEY_CACHE_DEFAULT_ENTRIES];
 };
 
 enum {
-- 
2.31.1

_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to