Add Mellanox HCA-specific userspace verbs support to mthca.

Signed-off-by: Roland Dreier <[EMAIL PROTECTED]>

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cq.c    2005-04-04 
14:57:12.228756073 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-04 
14:58:12.364679525 -0700
@@ -743,6 +743,7 @@
 }
 
 int mthca_init_cq(struct mthca_dev *dev, int nent,
+                 struct mthca_ucontext *ctx, u32 pdn,
                  struct mthca_cq *cq)
 {
        int size = nent * MTHCA_CQ_ENTRY_SIZE;
@@ -754,30 +755,33 @@
 
        might_sleep();
 
-       cq->ibcq.cqe = nent - 1;
+       cq->ibcq.cqe  = nent - 1;
+       cq->is_kernel = !ctx;
 
        cq->cqn = mthca_alloc(&dev->cq_table.alloc);
        if (cq->cqn == -1)
                return -ENOMEM;
 
        if (mthca_is_memfree(dev)) {
-               cq->arm_sn = 1;
-
                err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
                if (err)
                        goto err_out;
 
-               err = -ENOMEM;
+               if (cq->is_kernel) {
+                       cq->arm_sn = 1;
+
+                       err = -ENOMEM;
 
-               cq->set_ci_db_index = mthca_alloc_db(dev, 
MTHCA_DB_TYPE_CQ_SET_CI,
-                                                    cq->cqn, &cq->set_ci_db);
-               if (cq->set_ci_db_index < 0)
-                       goto err_out_icm;
-
-               cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
-                                                 cq->cqn, &cq->arm_db);
-               if (cq->arm_db_index < 0)
-                       goto err_out_ci;
+                       cq->set_ci_db_index = mthca_alloc_db(dev, 
MTHCA_DB_TYPE_CQ_SET_CI,
+                                                            cq->cqn, 
&cq->set_ci_db);
+                       if (cq->set_ci_db_index < 0)
+                               goto err_out_icm;
+
+                       cq->arm_db_index = mthca_alloc_db(dev, 
MTHCA_DB_TYPE_CQ_ARM,
+                                                         cq->cqn, &cq->arm_db);
+                       if (cq->arm_db_index < 0)
+                               goto err_out_ci;
+               }
        }
 
        mailbox = kmalloc(sizeof (struct mthca_cq_context) + 
MTHCA_CMD_MAILBOX_EXTRA,
@@ -787,12 +791,14 @@
 
        cq_context = MAILBOX_ALIGN(mailbox);
 
-       err = mthca_alloc_cq_buf(dev, size, cq);
-       if (err)
-               goto err_out_mailbox;
+       if (cq->is_kernel) {
+               err = mthca_alloc_cq_buf(dev, size, cq);
+               if (err)
+                       goto err_out_mailbox;
 
-       for (i = 0; i < nent; ++i)
-               set_cqe_hw(get_cqe(cq, i));
+               for (i = 0; i < nent; ++i)
+                       set_cqe_hw(get_cqe(cq, i));
+       }
 
        spin_lock_init(&cq->lock);
        atomic_set(&cq->refcount, 1);
@@ -803,11 +809,14 @@
                                                  MTHCA_CQ_STATE_DISARMED |
                                                  MTHCA_CQ_FLAG_TR);
        cq_context->start           = cpu_to_be64(0);
-       cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
-                                                 dev->driver_uar.index);
+       cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+       if (ctx)
+               cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
+       else
+               cq_context->logsize_usrpage |= 
cpu_to_be32(dev->driver_uar.index);
        cq_context->error_eqn       = 
cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
        cq_context->comp_eqn        = 
cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
-       cq_context->pd              = cpu_to_be32(dev->driver_pd.pd_num);
+       cq_context->pd              = cpu_to_be32(pdn);
        cq_context->lkey            = cpu_to_be32(cq->mr.ibmr.lkey);
        cq_context->cqn             = cpu_to_be32(cq->cqn);
 
@@ -845,17 +854,19 @@
        return 0;
 
 err_out_free_mr:
-       mthca_free_mr(dev, &cq->mr);
-       mthca_free_cq_buf(dev, cq);
+       if (cq->is_kernel) {
+               mthca_free_mr(dev, &cq->mr);
+               mthca_free_cq_buf(dev, cq);
+       }
 
 err_out_mailbox:
        kfree(mailbox);
 
-       if (mthca_is_memfree(dev))
+       if (cq->is_kernel && mthca_is_memfree(dev))
                mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
 
 err_out_ci:
-       if (mthca_is_memfree(dev))
+       if (cq->is_kernel)
                mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, 
cq->set_ci_db_index);
 
 err_out_icm:
@@ -895,7 +906,8 @@
                int j;
 
                printk(KERN_ERR "context for CQN %x (cons index %x, next sw 
%d)\n",
-                      cq->cqn, cq->cons_index, !!next_cqe_sw(cq));
+                      cq->cqn, cq->cons_index,
+                      cq->is_kernel ? !!next_cqe_sw(cq) : 0);
                for (j = 0; j < 16; ++j)
                        printk(KERN_ERR "[%2x] %08x\n", j * 4, 
be32_to_cpu(ctx[j]));
        }
@@ -913,15 +925,17 @@
        atomic_dec(&cq->refcount);
        wait_event(cq->wait, !atomic_read(&cq->refcount));
 
-       mthca_free_mr(dev, &cq->mr);
-       mthca_free_cq_buf(dev, cq);
-
-       if (mthca_is_memfree(dev)) {
-               mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    cq->arm_db_index);
-               mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, 
cq->set_ci_db_index);
-               mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+       if (cq->is_kernel) {
+               mthca_free_mr(dev, &cq->mr);
+               mthca_free_cq_buf(dev, cq);
+               if (mthca_is_memfree(dev)) {
+                       mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM,    
cq->arm_db_index);
+                       mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, 
cq->set_ci_db_index);
+               }
        }
 
+       if (mthca_is_memfree(dev))
+               mthca_table_put(dev, dev->cq_table.table, cq->cqn);
        mthca_free(&dev->cq_table.alloc, cq->cqn);
        kfree(mailbox);
 }
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h   2005-04-04 
14:57:12.254750421 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h        2005-04-04 
14:58:12.411669307 -0700
@@ -49,14 +49,6 @@
 #define DRV_VERSION    "0.06-pre"
 #define DRV_RELDATE    "November 8, 2004"
 
-/* XXX remove once SINAI defines make it into kernel.org */
-#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_OLD
-#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
-#endif
-#ifndef PCI_DEVICE_ID_MELLANOX_SINAI
-#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274
-#endif
-
 enum {
        MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
        MTHCA_FLAG_SRQ        = 1 << 2,
@@ -413,6 +405,7 @@
 int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
 int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
 int mthca_init_cq(struct mthca_dev *dev, int nent,
+                 struct mthca_ucontext *ctx, u32 pdn,
                  struct mthca_cq *cq);
 void mthca_free_cq(struct mthca_dev *dev,
                   struct mthca_cq *cq);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.c       
2005-04-04 14:57:12.256749986 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.c    2005-04-04 
14:58:12.412669090 -0700
@@ -45,6 +45,15 @@
        MTHCA_TABLE_CHUNK_SIZE = 1 << 18
 };
 
+struct mthca_user_db_table {
+       struct semaphore mutex;
+       struct {
+               u64                uvirt;
+               struct scatterlist mem;
+               int                refcount;
+       }                page[0];
+};
+
 void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
 {
        struct mthca_icm_chunk *chunk, *tmp;
@@ -334,13 +343,132 @@
        kfree(table);
 }
 
-static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
+static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int 
page)
 {
        return dev->uar_table.uarc_base +
-               dev->driver_uar.index * dev->uar_table.uarc_size +
+               uar->index * dev->uar_table.uarc_size +
                page * 4096;
 }
 
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+                     struct mthca_user_db_table *db_tab, int index, u64 uaddr)
+{
+       int ret = 0;
+       u8 status;
+       int i;
+
+       if (!mthca_is_memfree(dev))
+               return 0;
+
+       if (index < 0 || index > dev->uar_table.uarc_size / 8)
+               return -EINVAL;
+
+       down(&db_tab->mutex);
+
+       i = index / MTHCA_DB_REC_PER_PAGE;
+
+       if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE)       ||
+           (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
+           (uaddr & 4095)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (db_tab->page[i].refcount) {
+               ++db_tab->page[i].refcount;
+               goto out;
+       }
+
+       ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
+                            &db_tab->page[i].mem.page, NULL);
+       if (ret < 0)
+               goto out;
+
+       db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
+
+       ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+       if (ret < 0) {
+               put_page(db_tab->page[i].mem.page);
+               goto out;
+       }
+
+       ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
+                                mthca_uarc_virt(dev, uar, i), &status);
+       if (!ret && status)
+               ret = -EINVAL;
+       if (ret) {
+               pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, 
PCI_DMA_TODEVICE);
+               put_page(db_tab->page[i].mem.page);
+               goto out;
+       }
+
+       db_tab->page[i].uvirt    = uaddr;
+       db_tab->page[i].refcount = 1;
+
+out:
+       up(&db_tab->mutex);
+       return ret;
+}
+
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+                        struct mthca_user_db_table *db_tab, int index)
+{
+       if (!mthca_is_memfree(dev))
+               return;
+
+       /*
+        * To make our bookkeeping simpler, we don't unmap DB
+        * pages until we clean up the whole db table.
+        */
+
+       down(&db_tab->mutex);
+
+       --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
+
+       up(&db_tab->mutex);
+}
+
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
+{
+       struct mthca_user_db_table *db_tab;
+       int npages;
+       int i;
+
+       if (!mthca_is_memfree(dev))
+               return NULL;
+
+       npages = dev->uar_table.uarc_size / 4096;
+       db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, 
GFP_KERNEL);
+       if (!db_tab)
+               return ERR_PTR(-ENOMEM);
+
+       init_MUTEX(&db_tab->mutex);
+       for (i = 0; i < npages; ++i) {
+               db_tab->page[i].refcount = 0;
+               db_tab->page[i].uvirt    = 0;
+       }
+
+       return db_tab;
+}
+
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+                              struct mthca_user_db_table *db_tab)
+{
+       int i;
+       u8 status;
+
+       if (!mthca_is_memfree(dev))
+               return;
+
+       for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
+               if (db_tab->page[i].uvirt) {
+                       mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, 
&status);
+                       pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, 
PCI_DMA_TODEVICE);
+                       put_page(db_tab->page[i].mem.page);
+               }
+       }
+}
+
 int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
 {
        int group;
@@ -397,7 +525,8 @@
        }
        memset(page->db_rec, 0, 4096);
 
-       ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), 
&status);
+       ret = mthca_MAP_ICM_page(dev, page->mapping,
+                                mthca_uarc_virt(dev, &dev->driver_uar, i), 
&status);
        if (!ret && status)
                ret = -EINVAL;
        if (ret) {
@@ -451,7 +580,7 @@
 
        if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
            i >= dev->db_tab->max_group1 - 1) {
-               mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+               mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 
1, &status);
                
                dma_free_coherent(&dev->pdev->dev, 4096,
                                  page->db_rec, page->mapping);
@@ -520,7 +649,7 @@
                if (!bitmap_empty(dev->db_tab->page[i].used, 
MTHCA_DB_REC_PER_PAGE))
                        mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
 
-               mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+               mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 
1, &status);
                
                dma_free_coherent(&dev->pdev->dev, 4096,
                                  dev->db_tab->page[i].db_rec,
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.h       
2005-04-04 14:57:12.256749986 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.h    2005-04-04 
14:58:12.413668872 -0700
@@ -148,7 +148,7 @@
        struct semaphore      mutex;
 };
 
-enum {
+enum mthca_db_type {
        MTHCA_DB_TYPE_INVALID   = 0x0,
        MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
        MTHCA_DB_TYPE_CQ_ARM    = 0x2,
@@ -158,6 +158,17 @@
        MTHCA_DB_TYPE_GROUP_SEP = 0x7
 };
 
+struct mthca_user_db_table;
+struct mthca_uar;
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+                     struct mthca_user_db_table *db_tab, int index, u64 uaddr);
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+                        struct mthca_user_db_table *db_tab, int index);
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+                              struct mthca_user_db_table *db_tab);
+
 int mthca_init_db_tab(struct mthca_dev *dev);
 void mthca_cleanup_db_tab(struct mthca_dev *dev);
 int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c      
2005-04-04 14:57:12.286743464 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c   2005-04-04 
14:58:12.444662133 -0700
@@ -29,13 +29,17 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: mthca_provider.c 2100 2005-03-31 20:43:01Z roland $
+ * $Id: mthca_provider.c 2109 2005-04-04 21:10:34Z roland $
  */
 
+#include <asm/uaccess.h>
+
 #include <ib_smi.h>
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
+#include "mthca_user.h"
+#include "mthca_memfree.h"
 
 static int mthca_query_device(struct ib_device *ibdev,
                              struct ib_device_attr *props)
@@ -283,11 +287,78 @@
        return err;
 }
 
-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
+static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
+                                               const void __user *udata, int 
udatalen)
+{
+       struct mthca_alloc_ucontext      ucmd;
+       struct mthca_alloc_ucontext_resp uresp;
+       struct mthca_ucontext           *context;
+       int                              err;
+
+       if (copy_from_user(&ucmd, udata, sizeof ucmd))
+               return ERR_PTR(-EFAULT);
+
+       uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
+       if (mthca_is_memfree(to_mdev(ibdev)))
+               uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
+       else
+               uresp.uarc_size = 0;
+
+       context = kmalloc(sizeof *context, GFP_KERNEL);
+       if (!context)
+               return ERR_PTR(-ENOMEM);
+
+       err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
+       if (err) {
+               kfree(context);
+               return ERR_PTR(err);
+       }
+
+       context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
+       if (IS_ERR(context->db_tab)) {
+               err = PTR_ERR(context->db_tab);
+               mthca_uar_free(to_mdev(ibdev), &context->uar);
+               kfree(context);
+               return ERR_PTR(err);
+       }
+
+       if (copy_to_user((void __user *) (unsigned long) ucmd.respbuf,
+                        &uresp, sizeof uresp)) {
+               mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, 
context->db_tab);
+               mthca_uar_free(to_mdev(ibdev), &context->uar);
+               kfree(context);
+               return ERR_PTR(-EFAULT);
+       }
+
+       return &context->ibucontext;
+}
+
+static int mthca_dealloc_ucontext(struct ib_ucontext *context)
 {
+       mthca_cleanup_user_db_tab(to_mdev(context->device), 
&to_mucontext(context)->uar,
+                                 to_mucontext(context)->db_tab);
+       mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
+       kfree(to_mucontext(context));
+
+       return 0;
+}
+
+static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
+                                   struct ib_ucontext *context,
+                                   const void __user *udata, int udatalen)
+{
+       struct mthca_alloc_pd ucmd;
        struct mthca_pd *pd;
        int err;
 
+       if (context) {
+               if (udatalen != sizeof ucmd)
+                       return ERR_PTR(-EINVAL);
+
+               if (copy_from_user(&ucmd, udata, sizeof ucmd))
+                       return ERR_PTR(-EFAULT);
+       }
+
        pd = kmalloc(sizeof *pd, GFP_KERNEL);
        if (!pd)
                return ERR_PTR(-ENOMEM);
@@ -298,6 +369,14 @@
                return ERR_PTR(err);
        }
 
+       if (context) {
+               if (put_user(pd->pd_num, (u32 __user *) (unsigned long) 
ucmd.pdnbuf)) {
+                       mthca_pd_free(to_mdev(ibdev), pd);
+                       kfree(pd);
+                       return ERR_PTR(-EFAULT);
+               }
+       }
+
        return &pd->ibpd;
 }
 
@@ -337,8 +416,10 @@
 }
 
 static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
-                                    struct ib_qp_init_attr *init_attr)
+                                    struct ib_qp_init_attr *init_attr,
+                                    const void __user *udata, int udatalen)
 {
+       struct mthca_create_qp ucmd;
        struct mthca_qp *qp;
        int err;
 
@@ -347,10 +428,48 @@
        case IB_QPT_UC:
        case IB_QPT_UD:
        {
+               struct mthca_ucontext *context;
+
                qp = kmalloc(sizeof *qp, GFP_KERNEL);
                if (!qp)
                        return ERR_PTR(-ENOMEM);
 
+               if (pd->uobject) {
+                       context = to_mucontext(pd->uobject->context);
+
+                       if (udatalen != sizeof ucmd)
+                               return ERR_PTR(-EINVAL);
+
+                       if (copy_from_user(&ucmd, udata, sizeof ucmd))
+                               return ERR_PTR(-EFAULT);
+
+                       err = mthca_map_user_db(to_mdev(pd->device), 
&context->uar,
+                                               context->db_tab,
+                                               ucmd.sq_db_index, 
ucmd.sq_db_page);
+                       if (err) {
+                               kfree(qp);
+                               return ERR_PTR(err);
+                       }
+
+                       err = mthca_map_user_db(to_mdev(pd->device), 
&context->uar,
+                                               context->db_tab,
+                                               ucmd.rq_db_index, 
ucmd.rq_db_page);
+                       if (err) {
+                               mthca_unmap_user_db(to_mdev(pd->device),
+                                                   &context->uar,
+                                                   context->db_tab,
+                                                   ucmd.sq_db_index);
+                               kfree(qp);
+                               return ERR_PTR(err);
+                       }
+               }
+
+               if (pd->uobject) {
+                       qp->mr.ibmr.lkey = ucmd.lkey;
+                       qp->sq.db_index  = ucmd.sq_db_index;
+                       qp->rq.db_index  = ucmd.rq_db_index;
+               }
+
                qp->sq.max    = init_attr->cap.max_send_wr;
                qp->rq.max    = init_attr->cap.max_recv_wr;
                qp->sq.max_gs = init_attr->cap.max_send_sge;
@@ -361,12 +480,30 @@
                                     to_mcq(init_attr->recv_cq),
                                     init_attr->qp_type, init_attr->sq_sig_type,
                                     qp);
+
+               if (err && pd->uobject) {
+                       context = to_mucontext(pd->uobject->context);
+
+                       mthca_unmap_user_db(to_mdev(pd->device),
+                                           &context->uar,
+                                           context->db_tab,
+                                           ucmd.sq_db_index);
+                       mthca_unmap_user_db(to_mdev(pd->device),
+                                           &context->uar,
+                                           context->db_tab,
+                                           ucmd.rq_db_index);
+               }
+
                qp->ibqp.qp_num = qp->qpn;
                break;
        }
        case IB_QPT_SMI:
        case IB_QPT_GSI:
        {
+               /* Don't allow userspace to create special QPs */
+               if (pd->uobject)
+                       return ERR_PTR(-EINVAL);
+
                qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
                if (!qp)
                        return ERR_PTR(-ENOMEM);
@@ -396,42 +533,116 @@
                return ERR_PTR(err);
        }
 
-        init_attr->cap.max_inline_data = 0;
+       init_attr->cap.max_inline_data = 0;
+       init_attr->cap.max_send_wr     = qp->sq.max;
+       init_attr->cap.max_recv_wr     = qp->rq.max;
 
        return &qp->ibqp;
 }
 
 static int mthca_destroy_qp(struct ib_qp *qp)
 {
+       if (qp->uobject) {
+               mthca_unmap_user_db(to_mdev(qp->device),
+                                   &to_mucontext(qp->uobject->context)->uar,
+                                   to_mucontext(qp->uobject->context)->db_tab,
+                                   to_mqp(qp)->sq.db_index);
+               mthca_unmap_user_db(to_mdev(qp->device),
+                                   &to_mucontext(qp->uobject->context)->uar,
+                                   to_mucontext(qp->uobject->context)->db_tab,
+                                   to_mqp(qp)->rq.db_index);
+       }
        mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
        kfree(qp);
        return 0;
 }
 
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries)
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
+                                    struct ib_ucontext *context,
+                                    const void __user *udata, int udatalen)
 {
+       struct mthca_create_cq ucmd;
        struct mthca_cq *cq;
        int nent;
        int err;
 
+       if (context) {
+               if (udatalen != sizeof ucmd)
+                       return ERR_PTR(-EINVAL);
+
+               if (copy_from_user(&ucmd, udata, sizeof ucmd))
+                       return ERR_PTR(-EFAULT);
+
+               err = mthca_map_user_db(to_mdev(ibdev), 
&to_mucontext(context)->uar,
+                                       to_mucontext(context)->db_tab,
+                                       ucmd.set_db_index, ucmd.set_db_page);
+               if (err)
+                       return ERR_PTR(err);
+
+               err = mthca_map_user_db(to_mdev(ibdev), 
&to_mucontext(context)->uar,
+                                       to_mucontext(context)->db_tab,
+                                       ucmd.arm_db_index, ucmd.arm_db_page);
+               if (err)
+                       goto err_unmap_set;
+       }
+
        cq = kmalloc(sizeof *cq, GFP_KERNEL);
-       if (!cq)
-               return ERR_PTR(-ENOMEM);
+       if (!cq) {
+               err = -ENOMEM;
+               goto err_unmap_arm;
+       }
+
+       if (context) {
+               cq->mr.ibmr.lkey    = ucmd.lkey;
+               cq->set_ci_db_index = ucmd.set_db_index;
+               cq->arm_db_index    = ucmd.arm_db_index;
+       }
 
        for (nent = 1; nent <= entries; nent <<= 1)
                ; /* nothing */
 
-       err = mthca_init_cq(to_mdev(ibdev), nent, cq);
-       if (err) {
-               kfree(cq);
-               cq = ERR_PTR(err);
+       err = mthca_init_cq(to_mdev(ibdev), nent, 
+                           context ? to_mucontext(context) : NULL,
+                           context ? ucmd.pdn : 
to_mdev(ibdev)->driver_pd.pd_num,
+                           cq);
+       if (err)
+               goto err_free;
+
+       if (context && put_user(cq->cqn, (u32 __user *) (unsigned long) 
ucmd.cqnbuf)) {
+               mthca_free_cq(to_mdev(ibdev), cq);
+               goto err_free;
        }
 
        return &cq->ibcq;
+
+err_free:
+       kfree(cq);
+
+err_unmap_arm:
+       if (context)
+               mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+                                   to_mucontext(context)->db_tab, 
ucmd.arm_db_index);
+
+err_unmap_set:
+       if (context)
+               mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+                                   to_mucontext(context)->db_tab, 
ucmd.set_db_index);
+
+       return ERR_PTR(err);
 }
 
 static int mthca_destroy_cq(struct ib_cq *cq)
 {
+       if (cq->uobject) {
+               mthca_unmap_user_db(to_mdev(cq->device),
+                                   &to_mucontext(cq->uobject->context)->uar,
+                                   to_mucontext(cq->uobject->context)->db_tab,
+                                   to_mcq(cq)->arm_db_index);
+               mthca_unmap_user_db(to_mdev(cq->device),
+                                   &to_mucontext(cq->uobject->context)->uar,
+                                   to_mucontext(cq->uobject->context)->db_tab,
+                                   to_mcq(cq)->set_ci_db_index);
+       }
        mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
        kfree(cq);
 
@@ -558,6 +769,57 @@
                                  convert_access(acc), mr);
 
        if (err) {
+               kfree(page_list);
+               kfree(mr);
+               return ERR_PTR(err);
+       }
+
+       kfree(page_list);
+       return &mr->ibmr;
+}
+
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem 
*region,
+                                      int acc, const void __user *udata, int 
udatalen)
+{
+       struct ib_umem_chunk *chunk;
+       int npages = 0;
+       u64 *page_list;
+       struct mthca_mr *mr;
+       int shift;
+       int i, j, k;
+       int err;
+
+       shift = ffs(region->page_size) - 1;
+
+       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+       
+       list_for_each_entry(chunk, &region->chunk_list, list)
+               npages += chunk->nents;
+
+       page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
+       if (!page_list) {
+               kfree(mr);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       i = 0;
+
+       list_for_each_entry(chunk, &region->chunk_list, list)
+               for (j = 0; j < chunk->nmap; ++j)
+                       for (k = 0; k < sg_dma_len(&chunk->page_list[j]) >> 
shift; ++k)
+                               page_list[i++] = 
sg_dma_address(&chunk->page_list[j]) +
+                                       region->page_size * k;
+
+       err = mthca_mr_alloc_phys(to_mdev(pd->device),
+                                 to_mpd(pd)->pd_num,
+                                 page_list, shift, npages,
+                                 region->virt_base, region->length,
+                                 convert_access(acc), mr);
+
+       if (err) {
+               kfree(page_list);
                kfree(mr);
                return ERR_PTR(err);
        }
@@ -574,6 +836,22 @@
        return 0;
 }
 
+static int mthca_mmap_uar(struct ib_ucontext *context,
+                         struct vm_area_struct *vma)
+{
+       if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+               return -EINVAL;
+
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       if (remap_pfn_range(vma, vma->vm_start,
+                           to_mucontext(context)->uar.pfn,
+                           PAGE_SIZE, vma->vm_page_prot))
+               return -EAGAIN;
+
+       return 0;
+}
+
 static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
                                      struct ib_fmr_attr *fmr_attr)
 {
@@ -690,6 +968,8 @@
        int i;
 
        strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
+       dev->ib_dev.owner                = THIS_MODULE;
+
        dev->ib_dev.node_type            = IB_NODE_CA;
        dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
        dev->ib_dev.dma_device           = &dev->pdev->dev;
@@ -699,6 +979,8 @@
        dev->ib_dev.modify_port          = mthca_modify_port;
        dev->ib_dev.query_pkey           = mthca_query_pkey;
        dev->ib_dev.query_gid            = mthca_query_gid;
+       dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
+       dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
        dev->ib_dev.alloc_pd             = mthca_alloc_pd;
        dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
        dev->ib_dev.create_ah            = mthca_ah_create;
@@ -711,6 +993,7 @@
        dev->ib_dev.poll_cq              = mthca_poll_cq;
        dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
        dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
+       dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
        dev->ib_dev.dereg_mr             = mthca_dereg_mr;
 
        if (dev->mthca_flags & MTHCA_FLAG_FMR) {
@@ -726,6 +1009,7 @@
        dev->ib_dev.attach_mcast         = mthca_multicast_attach;
        dev->ib_dev.detach_mcast         = mthca_multicast_detach;
        dev->ib_dev.process_mad          = mthca_process_mad;
+       dev->ib_dev.mmap                 = mthca_mmap_uar;
 
        if (mthca_is_memfree(dev)) {
                dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.h      
2005-04-04 14:57:12.287743246 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.h   2005-04-04 
14:58:12.445661916 -0700
@@ -54,6 +54,14 @@
        int           index;
 };
 
+struct mthca_user_db_table;
+
+struct mthca_ucontext {
+       struct ib_ucontext          ibucontext;
+       struct mthca_uar            uar;
+       struct mthca_user_db_table *db_tab;
+};
+
 struct mthca_mr {
        struct ib_mr ibmr;
        int order;
@@ -167,6 +175,7 @@
        int                    cqn;
        u32                    cons_index;
        int                    is_direct;
+       int                    is_kernel;
 
        /* Next fields are Arbel only */
        int                    set_ci_db_index;
@@ -236,6 +245,11 @@
        dma_addr_t      header_dma;
 };
 
+static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext 
*ibucontext)
+{
+       return container_of(ibucontext, struct mthca_ucontext, ibucontext);
+}
+
 static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
 {
        return container_of(ibmr, struct mthca_fmr, ibmr);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c    2005-04-04 
14:57:12.320736072 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-04 
14:58:12.491651915 -0700
@@ -652,7 +652,11 @@
 
        /* leave arbel_sched_queue as 0 */
 
-       qp_context->usr_page   = cpu_to_be32(dev->driver_uar.index);
+       if (qp->ibqp.uobject)
+               qp_context->usr_page =
+                       
cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+       else
+               qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
        qp_context->local_qpn  = cpu_to_be32(qp->qpn);
        if (attr_mask & IB_QP_DEST_QPN) {
                qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -917,6 +921,15 @@
 
        qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
                                    1 << qp->sq.wqe_shift);
+
+       /*
+        * If this is a userspace QP, we don't actually have to
+        * allocate anything.  All we need is to calculate the WQE
+        * sizes and the send_wqe_offset, so we're done now.
+        */
+       if (pd->ibpd.uobject)
+               return 0;
+
        size = PAGE_ALIGN(qp->send_wqe_offset +
                          (qp->sq.max << qp->sq.wqe_shift));
 
@@ -1015,10 +1028,33 @@
        return err;
 }
 
-static int mthca_alloc_memfree(struct mthca_dev *dev,
+static void mthca_free_wqe_buf(struct mthca_dev *dev,
                               struct mthca_qp *qp)
 {
-       int ret = 0;
+       int i;
+       int size = PAGE_ALIGN(qp->send_wqe_offset +
+                             (qp->sq.max << qp->sq.wqe_shift));
+
+       if (qp->is_direct) {
+               pci_free_consistent(dev->pdev, size,
+                                   qp->queue.direct.buf,
+                                   pci_unmap_addr(&qp->queue.direct, mapping));
+       } else {
+               for (i = 0; i < size / PAGE_SIZE; ++i) {
+                       pci_free_consistent(dev->pdev, PAGE_SIZE,
+                                           qp->queue.page_list[i].buf,
+                                           
pci_unmap_addr(&qp->queue.page_list[i],
+                                                          mapping));
+               }
+       }
+
+       kfree(qp->wrid);
+}
+
+static int mthca_map_memfree(struct mthca_dev *dev,
+                            struct mthca_qp *qp)
+{
+       int ret;
 
        if (mthca_is_memfree(dev)) {
                ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
@@ -1029,35 +1065,15 @@
                if (ret)
                        goto err_qpc;
 
-               ret = mthca_table_get(dev, dev->qp_table.rdb_table,
-                                     qp->qpn << dev->qp_table.rdb_shift);
-               if (ret)
-                       goto err_eqpc;
-
-               qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
-                                                qp->qpn, &qp->rq.db);
-               if (qp->rq.db_index < 0) {
-                       ret = -ENOMEM;
-                       goto err_rdb;
-               }
+               ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+                                     qp->qpn << dev->qp_table.rdb_shift);
+               if (ret)
+                       goto err_eqpc;
 
-               qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
-                                                qp->qpn, &qp->sq.db);
-               if (qp->sq.db_index < 0) {
-                       ret = -ENOMEM;
-                       goto err_rq_db;
-               }
        }
 
        return 0;
 
-err_rq_db:
-       mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-
-err_rdb:
-       mthca_table_put(dev, dev->qp_table.rdb_table,
-                       qp->qpn << dev->qp_table.rdb_shift);
-
 err_eqpc:
        mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
 
@@ -1067,16 +1083,43 @@
        return ret;
 }
 
+static void mthca_unmap_memfree(struct mthca_dev *dev,
+                               struct mthca_qp *qp)
+{
+       if (mthca_is_memfree(dev)) {
+               mthca_table_put(dev, dev->qp_table.rdb_table,
+                               qp->qpn << dev->qp_table.rdb_shift);
+               mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+               mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+       }
+}
+
+static int mthca_alloc_memfree(struct mthca_dev *dev,
+                              struct mthca_qp *qp)
+{
+       int ret = 0;
+
+       if (mthca_is_memfree(dev)) {
+               qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+                                                qp->qpn, &qp->rq.db);
+               if (qp->rq.db_index < 0)
+                       return ret;
+
+               qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+                                                qp->qpn, &qp->sq.db);
+               if (qp->sq.db_index < 0)
+                       mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+       }
+
+       return ret;
+}
+
 static void mthca_free_memfree(struct mthca_dev *dev,
                               struct mthca_qp *qp)
 {
        if (mthca_is_memfree(dev)) {
                mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
                mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-               mthca_table_put(dev, dev->qp_table.rdb_table,
-                               qp->qpn << dev->qp_table.rdb_shift);
-               mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
-               mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
        }
 }
 
@@ -1108,13 +1151,28 @@
        mthca_wq_init(&qp->sq);
        mthca_wq_init(&qp->rq);
 
-       ret = mthca_alloc_memfree(dev, qp);
+       ret = mthca_map_memfree(dev, qp);
        if (ret)
                return ret;
 
        ret = mthca_alloc_wqe_buf(dev, pd, qp);
        if (ret) {
-               mthca_free_memfree(dev, qp);
+               mthca_unmap_memfree(dev, qp);
+               return ret;
+       }
+
+       /*
+        * If this is a userspace QP, we're done now.  The doorbells
+        * will be allocated and buffers will be initialized in
+        * userspace.
+        */
+       if (pd->ibpd.uobject)
+               return 0;
+
+       ret = mthca_alloc_memfree(dev, qp);
+       if (ret) {
+               mthca_free_wqe_buf(dev, qp);
+               mthca_unmap_memfree(dev, qp);
                return ret;
        }
 
@@ -1274,8 +1332,6 @@
                   struct mthca_qp *qp)
 {
        u8 status;
-       int size;
-       int i;
        struct mthca_cq *send_cq;
        struct mthca_cq *recv_cq;
 
@@ -1305,31 +1361,22 @@
        if (qp->state != IB_QPS_RESET)
                mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, 
&status);
 
-       mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
-       if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-               mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
-
-       mthca_free_mr(dev, &qp->mr);
-
-       size = PAGE_ALIGN(qp->send_wqe_offset +
-                         (qp->sq.max << qp->sq.wqe_shift));
+       /*
+        * If this is a userspace QP, the buffers, MR, CQs and so on
+        * will be cleaned up in userspace, so all we have to do is
+        * unref the mem-free tables and free the QPN in our table.
+        */
+       if (!qp->ibqp.uobject) {
+               mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
+               if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
+                       mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, 
qp->qpn);
 
-       if (qp->is_direct) {
-               pci_free_consistent(dev->pdev, size,
-                                   qp->queue.direct.buf,
-                                   pci_unmap_addr(&qp->queue.direct, mapping));
-       } else {
-               for (i = 0; i < size / PAGE_SIZE; ++i) {
-                       pci_free_consistent(dev->pdev, PAGE_SIZE,
-                                           qp->queue.page_list[i].buf,
-                                           
pci_unmap_addr(&qp->queue.page_list[i],
-                                                          mapping));
-               }
+               mthca_free_mr(dev, &qp->mr);
+               mthca_free_memfree(dev, qp);
+               mthca_free_wqe_buf(dev, qp);
        }
 
-       kfree(qp->wrid);
-
-       mthca_free_memfree(dev, qp);
+       mthca_unmap_memfree(dev, qp);
 
        if (is_sqp(dev, qp)) {
                atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ linux-export/drivers/infiniband/hw/mthca/mthca_user.h       2005-04-04 
14:58:12.491651915 -0700
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MTHCA_USER_H
+#define MTHCA_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mthca_alloc_ucontext {
+       __u64 respbuf;
+};
+
+struct mthca_alloc_ucontext_resp {
+       __u32 qp_tab_size;
+       __u32 uarc_size;
+};
+
+struct mthca_alloc_pd {
+       __u64 pdnbuf;
+};
+
+struct mthca_alloc_pd_resp {
+       __u32 pdn;
+       __u32 reserved;
+};
+
+struct mthca_create_cq {
+       __u64 cqnbuf;
+       __u32 lkey;
+       __u32 pdn;
+       __u64 arm_db_page;
+       __u64 set_db_page;
+       __u32 arm_db_index;
+       __u32 set_db_index;
+};
+
+struct mthca_create_cq_resp {
+       __u32 cqn;
+       __u32 reserved;
+};
+
+struct mthca_create_qp {
+       __u32 lkey;
+       __u32 reserved;
+       __u64 sq_db_page;
+       __u64 rq_db_page;
+       __u32 sq_db_index;
+       __u32 rq_db_index;
+};
+
+#endif /* MTHCA_USER_H */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to