Add provider-level caching of pkeys to mthca

* have the dirver intercept smp's which are pkey table notifications,
  and update its internal cache with the new values.
* modify query_pkey to use this cache instead of doing a blocking HW
  call
* while creating a MLX QP, use this cache


Signed-off-by: Yosef Etigin <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/mthca/mthca_dev.h      |   12 +
 drivers/infiniband/hw/mthca/mthca_mad.c      |    5 
 drivers/infiniband/hw/mthca/mthca_provider.c |  167 +++++++++++++++++++++++----
 drivers/infiniband/hw/mthca/mthca_qp.c       |    5 
 include/rdma/ib_smi.h                        |    1 
 5 files changed, 163 insertions(+), 27 deletions(-)

Index: b/drivers/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_dev.h   2007-05-02 17:47:52.931912600 
+0300
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h   2007-05-02 17:48:31.525038376 
+0300
@@ -49,6 +49,8 @@
 
 #include <asm/semaphore.h>
 
+#include <rdma/ib_smi.h>
+
 #include "mthca_provider.h"
 #include "mthca_doorbell.h"
 
@@ -287,6 +289,11 @@ struct mthca_catas_err {
        struct list_head        list;
 };
 
+struct mthca_pkey_cache {
+       int             table_len;
+       u16             table[0];
+};
+
 extern struct mutex mthca_device_mutex;
 
 struct mthca_dev {
@@ -360,6 +367,9 @@ struct mthca_dev {
        struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
        spinlock_t            sm_lock;
        u8                    rate[MTHCA_MAX_PORTS];
+
+       rwlock_t               pkey_cache_lock;
+       struct mthca_pkey_cache *pkey_cache[MTHCA_MAX_PORTS];
 };
 
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
@@ -585,6 +595,8 @@ int mthca_process_mad(struct ib_device *
 int mthca_create_agents(struct mthca_dev *dev);
 void mthca_free_agents(struct mthca_dev *dev);
 
+int mthca_cache_update(struct mthca_dev *mdev, struct ib_smp *smp, u8 
port_num);
+
 static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
 {
        return container_of(ibdev, struct mthca_dev, ib_dev);
Index: b/drivers/infiniband/hw/mthca/mthca_mad.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_mad.c   2007-05-02 17:47:53.067888380 
+0300
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c   2007-05-02 17:48:31.525038376 
+0300
@@ -134,6 +134,11 @@ static void smp_snoop(struct ib_device *
                }
 
                if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+
+                       /* update pkey cache from a snnoped MAD */
+                       mthca_dbg(to_mdev(ibdev), "pkey change at port %d\n", 
port_num);
+                       mthca_cache_update(to_mdev(ibdev), (struct ib_smp*) 
mad, port_num);
+
                        event.device           = ibdev;
                        event.event            = IB_EVENT_PKEY_CHANGE;
                        event.element.port_num = port_num;
Index: b/drivers/infiniband/hw/mthca/mthca_provider.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_provider.c      2007-05-02 
17:47:52.996901024 +0300
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c      2007-05-02 
17:48:31.526038198 +0300
@@ -243,36 +243,27 @@ out:
 static int mthca_query_pkey(struct ib_device *ibdev,
                            u8 port, u16 index, u16 *pkey)
 {
-       struct ib_smp *in_mad  = NULL;
-       struct ib_smp *out_mad = NULL;
-       int err = -ENOMEM;
-       u8 status;
+       struct mthca_dev * mdev;
+       unsigned int flags;
 
-       in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-       out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
-       if (!in_mad || !out_mad)
-               goto out;
+       mdev = to_mdev(ibdev);
+       read_lock_irqsave(&mdev->pkey_cache_lock, flags);
 
-       init_query_mad(in_mad);
-       in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
-       in_mad->attr_mod = cpu_to_be32(index / 32);
+       if (port < 1 || port > mdev->ib_dev.phys_port_cnt ||
+               index >= mdev->pkey_cache[ port - 1 ]->table_len ) {
+               mthca_warn(mdev, "pkey request at %d[%d] is out of range %d[%d] 
- %d[%d]\n",
+                                       port, index,
+                                       1, 0,
+                                       mdev->ib_dev.phys_port_cnt, 
mdev->pkey_cache[ port - 1 ]->table_len -1);
 
-       err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
-                           port, NULL, NULL, in_mad, out_mad,
-                           &status);
-       if (err)
-               goto out;
-       if (status) {
-               err = -EINVAL;
-               goto out;
+               read_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+               return -EINVAL;
        }
 
-       *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
+       *pkey = mdev->pkey_cache[ port - 1 ]->table[ index ];
 
- out:
-       kfree(in_mad);
-       kfree(out_mad);
-       return err;
+       read_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+       return 0;
 }
 
 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
@@ -1259,6 +1250,127 @@ out:
        return err;
 }
 
+/*
+ * Initiallize cache:
+ *  ask the SM for the table
+ */
+static int mthca_cache_init(struct mthca_dev *mdev)
+{
+       struct ib_smp *in_mad  = NULL;
+       struct ib_smp *out_mad = NULL;
+       struct ib_port_attr *tprops = NULL;
+       unsigned int i;
+       unsigned int tbl_len;
+
+       int err = -ENOMEM;
+       u8 status;
+
+       rwlock_init(&mdev->pkey_cache_lock);
+
+       mthca_dbg(mdev, "setting up PKey cache\n");
+
+       memset(mdev->pkey_cache, 0, sizeof mdev->pkey_cache);
+
+       tprops = kmalloc( sizeof * tprops, GFP_KERNEL );
+       in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
+       out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+
+       if (!tprops || !in_mad || !out_mad)
+               goto out;
+
+       for ( i = 0; i < mdev->ib_dev.phys_port_cnt; ++i ) {
+
+               /* find out how many pkeys this port holds */
+               err = mthca_query_port(&mdev->ib_dev, i+1, tprops);
+               if (err)
+                       continue;
+
+               /* allocate cache */
+               tbl_len = tprops->pkey_tbl_len;
+               mdev->pkey_cache[ i ] = kmalloc(sizeof(struct mthca_pkey_cache)
+                                               + tbl_len *     sizeof(u16), 
GFP_KERNEL);
+               if ( ! mdev->pkey_cache[ i ] )
+                       goto out;
+
+               mdev->pkey_cache[ i ]->table_len = tbl_len;
+
+               while (tbl_len) {
+
+                       /* send pkey query mad */
+                       memset(in_mad, 0, sizeof * in_mad);
+                       init_query_mad(in_mad);
+                       in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
+                       in_mad->attr_mod = cpu_to_be32( (tbl_len-1) / 
IB_SMP_NUM_PKEY_ENTRIES);
+
+                       err = mthca_MAD_IFC(mdev, 1, 1,
+                                   i + 1, NULL, NULL, in_mad, out_mad,
+                                   &status);
+
+                       if (err || status)
+                               break;
+
+                       mthca_cache_update(mdev, out_mad, i + 1);
+                       tbl_len -= IB_SMP_NUM_PKEY_ENTRIES;
+               }
+       }
+
+out:
+       kfree(in_mad);
+       kfree(out_mad);
+       kfree(tprops);
+       return err;
+}
+
+/*
+ * Destroy the pkey cache
+ */
+static void mthca_cache_destroy(struct mthca_dev *mdev)
+{
+       int i;
+       for ( i = 0; i < mdev->ib_dev.phys_port_cnt; ++i ) {
+               kfree( mdev->pkey_cache[ i ] );
+       }
+}
+
+/*
+ * We snooped a pkey-table mad
+ * extract the new pkey table, and update our internal cache
+ */
+int mthca_cache_update(struct mthca_dev *mdev, struct ib_smp *smp, u8 port_num)
+{
+       unsigned int table_offset;
+       unsigned long flags;
+       int i;
+       struct mthca_pkey_cache *pkey_cache;
+       u16     *entry;
+
+       table_offset = ( be32_to_cpu(smp->attr_mod) & 0xFFFF ) *
+                                                                               
IB_SMP_NUM_PKEY_ENTRIES;
+
+       mthca_dbg(mdev, "port %d: new pkey table at offset %d\n",
+                                       port_num, table_offset);
+
+       write_lock_irqsave(&mdev->pkey_cache_lock, flags);
+
+       pkey_cache = mdev->pkey_cache[ port_num - 1 ];
+
+       if (pkey_cache->table_len < IB_SMP_NUM_PKEY_ENTRIES + table_offset) {
+               mthca_warn(mdev, "pkey table out of range - ignoring\n");
+               write_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+               return -EINVAL;
+       }
+
+       /* update the cache */
+       entry = pkey_cache->table + table_offset;
+       for ( i = 0; i < IB_SMP_NUM_PKEY_ENTRIES; ++i ) {
+               u16 pkey = be16_to_cpu ( *( ( (u16*)smp->data ) + i ) );
+               *(entry++) = pkey;
+       }
+
+       write_unlock_irqrestore(&mdev->pkey_cache_lock, flags);
+       return 0;
+}
+
 int mthca_register_device(struct mthca_dev *dev)
 {
        int ret;
@@ -1365,6 +1477,12 @@ int mthca_register_device(struct mthca_d
 
        mutex_init(&dev->cap_mask_mutex);
 
+       ret = mthca_cache_init(dev);
+       if (ret) {
+               mthca_cache_destroy(dev);
+               return ret;
+       }
+
        ret = ib_register_device(&dev->ib_dev);
        if (ret)
                return ret;
@@ -1387,4 +1505,5 @@ void mthca_unregister_device(struct mthc
 {
        mthca_stop_catas_poll(dev);
        ib_unregister_device(&dev->ib_dev);
+       mthca_cache_destroy(dev);
 }
Index: b/include/rdma/ib_smi.h
===================================================================
--- a/include/rdma/ib_smi.h     2007-05-02 17:47:12.741071381 +0300
+++ b/include/rdma/ib_smi.h     2007-05-02 17:48:31.527038020 +0300
@@ -43,6 +43,7 @@
 
 #define IB_SMP_DATA_SIZE                       64
 #define IB_SMP_MAX_PATH_HOPS                   64
+#define IB_SMP_NUM_PKEY_ENTRIES                32
 
 struct ib_smp {
        u8      base_version;
Index: b/drivers/infiniband/hw/mthca/mthca_qp.c
===================================================================
--- a/drivers/infiniband/hw/mthca/mthca_qp.c    2007-05-02 17:48:30.752176039 
+0300
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c    2007-05-02 17:48:31.528037842 
+0300
@@ -41,7 +41,6 @@
 #include <asm/io.h>
 
 #include <rdma/ib_pack.h>
-#include <rdma/ib_verbs.h>
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
@@ -1484,9 +1483,9 @@ static int build_mlx_header(struct mthca
                sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
        sqp->ud_header.bth.solicited_event = !!(wr->send_flags & 
IB_SEND_SOLICITED);
        if (!sqp->qp.ibqp.qp_num)
-               ib_query_pkey(&dev->ib_dev, sqp->qp.port, sqp->pkey_index, 
&pkey);
+               dev->ib_dev.query_pkey(&dev->ib_dev, sqp->qp.port, 
sqp->pkey_index, &pkey);
        else
-               ib_query_pkey(&dev->ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, 
&pkey);
+               dev->ib_dev.query_pkey(&dev->ib_dev, sqp->qp.port, 
wr->wr.ud.pkey_index, &pkey);
 
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
        sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to