From: Eli Cohen <e...@mellanox.com>

Here is an implementation of an allocator that allocates blue flame
registers. A blue flame register is used for generating send doorbells.
A blue flame register can be used to generate either a regular doorbell
or a blue flame doorbell where the data to be sent is written to the
device's I/O memory hence saving the need to read the data from memory.
For blue flame kind of doorbells to succeed, the blue flame register
need to be mapped as write combining. The user can specify what kind of
send doorbells she wishes to use. If she requested write combining
mapping but that failed, the allocator will fall back to non write
combining mapping and will indicate that to the user.
Subsequent patches in this series will make use of this allocator.

Signed-off-by: Eli Cohen <e...@mellanox.com>
Reviewed-by: Matan Barak <mat...@mellanox.com>
Signed-off-by: Leon Romanovsky <l...@kernel.org>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/uar.c | 235 ++++++++++++++++++++++++++
 include/linux/mlx5/device.h                   |   2 +
 include/linux/mlx5/driver.h                   |  37 ++++
 include/linux/mlx5/mlx5_ifc.h                 |   7 +-
 4 files changed, 279 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c 
b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index ce7fceb..6a081a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -231,3 +231,238 @@ void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, 
struct mlx5_uar *uar)
        mlx5_cmd_free_uar(mdev, uar->index);
 }
 EXPORT_SYMBOL(mlx5_unmap_free_uar);
+
+static int uars_per_sys_page(struct mlx5_core_dev *mdev)
+{
+       if (MLX5_CAP_GEN(mdev, uar_4k))
+               return MLX5_CAP_GEN(mdev, num_of_uars_per_page);
+
+       return 1;
+}
+
+static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index)
+{
+       u32 system_page_index;
+
+       if (MLX5_CAP_GEN(mdev, uar_4k))
+               system_page_index = index >> (PAGE_SHIFT - 
MLX5_ADAPTER_PAGE_SHIFT);
+       else
+               system_page_index = index;
+
+       return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + 
system_page_index;
+}
+
+static void up_rel_func(struct kref *kref)
+{
+       struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, 
ref_count);
+
+       list_del(&up->list);
+       if (mlx5_cmd_free_uar(up->mdev, up->index))
+               mlx5_core_warn(up->mdev, "failed to free uar index %d\n", 
up->index);
+       kfree(up->reg_bitmap);
+       kfree(up->fp_bitmap);
+       kfree(up);
+}
+
+static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
+                                             bool map_wc)
+{
+       struct mlx5_uars_page *up;
+       int err = -ENOMEM;
+       phys_addr_t pfn;
+       int bfregs;
+       int i;
+
+       bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
+       up = kzalloc(sizeof(*up), GFP_KERNEL);
+       if (!up)
+               return ERR_PTR(err);
+
+       up->mdev = mdev;
+       up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), 
GFP_KERNEL);
+       if (!up->reg_bitmap)
+               goto error1;
+
+       up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), 
GFP_KERNEL);
+       if (!up->fp_bitmap)
+               goto error1;
+
+       for (i = 0; i < bfregs; i++)
+               if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR)
+                       set_bit(i, up->reg_bitmap);
+               else
+                       set_bit(i, up->fp_bitmap);
+
+       up->bfregs = bfregs;
+       up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
+       up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / 
MLX5_BFREGS_PER_UAR;
+
+       err = mlx5_cmd_alloc_uar(mdev, &up->index);
+       if (err) {
+               mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err);
+               goto error1;
+       }
+
+       pfn = uar2pfn(mdev, up->index);
+       if (map_wc) {
+               up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
+               if (!up->map) {
+                       err = -EAGAIN;
+                       goto error2;
+               }
+       } else {
+               up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+               if (!up->map) {
+                       err = -ENOMEM;
+                       goto error2;
+               }
+       }
+       kref_init(&up->ref_count);
+       mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n",
+                     up->index, up->bfregs);
+       return up;
+
+error2:
+       if (mlx5_cmd_free_uar(mdev, up->index))
+               mlx5_core_warn(mdev, "failed to free uar index %d\n", 
up->index);
+error1:
+       kfree(up->fp_bitmap);
+       kfree(up->reg_bitmap);
+       kfree(up);
+       return ERR_PTR(err);
+}
+
+static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi)
+{
+       /* return the offset in bytes from the start of the page to the
+        * blue flame area of the UAR
+        */
+       return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE +
+              (dbi % MLX5_BFREGS_PER_UAR) *
+              (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET;
+}
+
+static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+                      bool map_wc, bool fast_path)
+{
+       struct mlx5_bfreg_data *bfregs;
+       struct mlx5_uars_page *up;
+       struct list_head *head;
+       unsigned long *bitmap;
+       unsigned int *avail;
+       struct mutex *lock;  /* pointer to right mutex */
+       int dbi;
+
+       bfregs = &mdev->priv.bfregs;
+       if (map_wc) {
+               head = &bfregs->wc_head.list;
+               lock = &bfregs->wc_head.lock;
+       } else {
+               head = &bfregs->reg_head.list;
+               lock = &bfregs->reg_head.lock;
+       }
+       mutex_lock(lock);
+       if (list_empty(head)) {
+               up = alloc_uars_page(mdev, map_wc);
+               if (IS_ERR(up)) {
+                       mutex_unlock(lock);
+                       return PTR_ERR(up);
+               }
+               list_add(&up->list, head);
+       } else {
+               up = list_entry(head->next, struct mlx5_uars_page, list);
+               kref_get(&up->ref_count);
+       }
+       if (fast_path) {
+               bitmap = up->fp_bitmap;
+               avail = &up->fp_avail;
+       } else {
+               bitmap = up->reg_bitmap;
+               avail = &up->reg_avail;
+       }
+       dbi = find_first_bit(bitmap, up->bfregs);
+       clear_bit(dbi, bitmap);
+       (*avail)--;
+       if (!(*avail))
+               list_del(&up->list);
+
+       bfreg->map = up->map + map_offset(mdev, dbi);
+       bfreg->up = up;
+       bfreg->wc = map_wc;
+       bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR;
+       mutex_unlock(lock);
+
+       return 0;
+}
+
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+                    bool map_wc, bool fast_path)
+{
+       int err;
+
+       err = alloc_bfreg(mdev, bfreg, map_wc, fast_path);
+       if (!err)
+               return 0;
+
+       if (err == -EAGAIN && map_wc)
+               return alloc_bfreg(mdev, bfreg, false, fast_path);
+
+       return err;
+}
+EXPORT_SYMBOL(mlx5_alloc_bfreg);
+
+static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev,
+                                          struct mlx5_uars_page *up,
+                                          struct mlx5_sq_bfreg *bfreg)
+{
+       unsigned int uar_idx;
+       unsigned int bfreg_idx;
+       unsigned int bf_reg_size;
+
+       bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size);
+
+       uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT;
+       bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - 
MLX5_BF_OFFSET) / bf_reg_size;
+
+       return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx;
+}
+
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg)
+{
+       struct mlx5_bfreg_data *bfregs;
+       struct mlx5_uars_page *up;
+       struct mutex *lock; /* pointer to right mutex */
+       unsigned int dbi;
+       bool fp;
+       unsigned int *avail;
+       unsigned long *bitmap;
+       struct list_head *head;
+
+       bfregs = &mdev->priv.bfregs;
+       if (bfreg->wc) {
+               head = &bfregs->wc_head.list;
+               lock = &bfregs->wc_head.lock;
+       } else {
+               head = &bfregs->reg_head.list;
+               lock = &bfregs->reg_head.lock;
+       }
+       up = bfreg->up;
+       dbi = addr_to_dbi_in_syspage(mdev, up, bfreg);
+       fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR;
+       if (fp) {
+               avail = &up->fp_avail;
+               bitmap = up->fp_bitmap;
+       } else {
+               avail = &up->reg_avail;
+               bitmap = up->reg_bitmap;
+       }
+       mutex_lock(lock);
+       (*avail)++;
+       set_bit(dbi, bitmap);
+       if (*avail == 1)
+               list_add_tail(&up->list, head);
+
+       kref_put(&up->ref_count, up_rel_func);
+       mutex_unlock(lock);
+}
+EXPORT_SYMBOL(mlx5_free_bfreg);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index aa851c5..db1b928 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -215,6 +215,8 @@ enum {
        MLX5_BFREGS_PER_UAR             = 4,
        MLX5_MAX_UARS                   = 1 << 8,
        MLX5_NON_FP_BFREGS_PER_UAR      = 2,
+       MLX5_FP_BFREGS_PER_UAR          = MLX5_BFREGS_PER_UAR -
+                                         MLX5_NON_FP_BFREGS_PER_UAR,
        MLX5_MAX_BFREGS                 = MLX5_MAX_UARS *
                                          MLX5_NON_FP_BFREGS_PER_UAR,
 };
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3d07e25..969aa1f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -452,6 +452,39 @@ struct mlx5_eq_table {
        spinlock_t              lock;
 };
 
+struct mlx5_uars_page {
+       void __iomem           *map;
+       bool                    wc;
+       u32                     index;
+       struct list_head        list;
+       unsigned int            bfregs;
+       unsigned long          *reg_bitmap; /* for non fast path bf regs */
+       unsigned long          *fp_bitmap;
+       unsigned int            reg_avail;
+       unsigned int            fp_avail;
+       struct kref             ref_count;
+       struct mlx5_core_dev   *mdev;
+};
+
+struct mlx5_bfreg_head {
+       /* protect blue flame registers allocations */
+       struct mutex            lock;
+       struct list_head        list;
+};
+
+struct mlx5_bfreg_data {
+       struct mlx5_bfreg_head  reg_head;
+       struct mlx5_bfreg_head  wc_head;
+};
+
+struct mlx5_sq_bfreg {
+       void __iomem           *map;
+       struct mlx5_uars_page  *up;
+       bool                    wc;
+       u32                     index;
+       unsigned int            offset;
+};
+
 struct mlx5_uar {
        u32                     index;
        struct list_head        bf_list;
@@ -645,6 +678,7 @@ struct mlx5_priv {
        void                   *pfault_ctx;
        struct srcu_struct      pfault_srcu;
 #endif
+       struct mlx5_bfreg_data          bfregs;
 };
 
 enum mlx5_device_state {
@@ -1022,6 +1056,9 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
 int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
 void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
 bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+                    bool map_wc, bool fast_path);
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
 
 static inline int fw_initializing(struct mlx5_core_dev *dev)
 {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 15f8967..1223fef 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -905,7 +905,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         uc[0x1];
        u8         rc[0x1];
 
-       u8         reserved_at_240[0xa];
+       u8         uar_4k[0x1];
+       u8         reserved_at_241[0x9];
        u8         uar_sz[0x6];
        u8         reserved_at_250[0x8];
        u8         log_pg_sz[0x8];
@@ -997,7 +998,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         device_frequency_mhz[0x20];
        u8         device_frequency_khz[0x20];
 
-       u8         reserved_at_500[0x80];
+       u8         reserved_at_500[0x20];
+       u8         num_of_uars_per_page[0x20];
+       u8         reserved_at_540[0x40];
 
        u8         reserved_at_580[0x3f];
        u8         cqe_compression[0x1];
-- 
2.7.4

Reply via email to