Add XRC support for working with file descriptors, to allow
sharing XRC domains between processes.

Signed-off-by: Jack Morgenstein <[EMAIL PROTECTED]>

Index: ofed_kernel/drivers/infiniband/core/uverbs_cmd.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/uverbs_cmd.c       2007-09-16 
16:32:22.844587000 +0200
+++ ofed_kernel/drivers/infiniband/core/uverbs_cmd.c    2007-09-18 
11:09:20.590991000 +0200
@@ -39,6 +39,7 @@
 #include <linux/fs.h>
 
 #include <asm/uaccess.h>
+#include <asm/fcntl.h>
 
 #include "uverbs.h"
 
@@ -252,14 +253,18 @@ static void put_srq_read(struct ib_srq *
        put_uobj_read(srq->uobject);
 }
 
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext 
*context)
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
+                                    struct ib_ucontext *context,
+                                    struct ib_uobject **uobj)
 {
-       return idr_read_obj(&ib_uverbs_xrc_domain_idr, xrcd_handle, context, 0);
+       *uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle,
+                             context, 0);
+       return *uobj ? (*uobj)->object : NULL;
 }
 
-static void put_xrcd_read(struct ib_xrcd *xrcd)
+static void put_xrcd_read(struct ib_uobject *uobj)
 {
-       put_uobj_read(xrcd->uobject);
+       put_uobj_read(uobj);
 }
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -1036,6 +1041,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
        struct ib_qp                   *qp;
        struct ib_qp_init_attr          attr;
        struct ib_xrcd                 *xrcd;
+       struct ib_uobject               *xrcd_uobj;
        int ret;
 
        if (out_len < sizeof resp)
@@ -1058,7 +1064,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
        srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
                idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
        xrcd = (cmd.is_srq && cmd.qp_type == IB_QPT_XRC) ?
-               idr_read_xrcd(cmd.srq_handle, file->ucontext) : NULL;
+               idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : 
NULL;
        pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
        scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
        rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
@@ -1139,7 +1145,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv
        if (srq)
                put_srq_read(srq);
        if (xrcd)
-               put_xrcd_read(xrcd);
+               put_xrcd_read(xrcd_uobj);
 
        mutex_lock(&file->mutex);
        list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1167,7 +1173,7 @@ err_put:
        if (srq)
                put_srq_read(srq);
        if (xrcd)
-               put_xrcd_read(xrcd);
+               put_xrcd_read(xrcd_uobj);
 
        put_uobj_write(&obj->uevent.uobject);
        return ret;
@@ -2071,6 +2077,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct 
        struct ib_cq                    *xrc_cq;
        struct ib_xrcd                  *xrcd;
        struct ib_srq_init_attr          attr;
+       struct ib_uobject               *xrcd_uobj;
        int ret;
 
        if (out_len < sizeof resp)
@@ -2102,7 +2109,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct 
                goto err_put_pd;
        }
 
-       xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext);
+       xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
        if (!xrcd) {
                ret = -EINVAL;
                goto err_put_cq;
@@ -2153,7 +2160,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct 
                goto err_copy;
        }
 
-       put_xrcd_read(xrcd);
+       put_xrcd_read(xrcd_uobj);
        put_cq_read(xrc_cq);
        put_pd_read(pd);
 
@@ -2174,7 +2181,7 @@ err_destroy:
        ib_destroy_srq(srq);
 
 err_put:
-       put_xrcd_read(xrcd);
+       put_xrcd_read(xrcd_uobj);
 
 err_put_cq:
        put_cq_read(xrc_cq);
@@ -2306,6 +2313,117 @@ ssize_t ib_uverbs_destroy_srq(struct ib_
        return ret ? ret : in_len;
 }
 
+static struct inode * xrc_fd2inode(unsigned int fd)
+{
+       struct file * f = fget(fd);
+
+       if (!f)
+               return NULL;
+
+       return f->f_dentry->d_inode;
+}
+
+struct xrcd_table_entry {
+       struct rb_node node;
+       struct inode * inode;
+       struct ib_xrcd *xrcd;
+};
+
+static int xrcd_table_insert(struct ib_device *dev,
+                            struct inode *i_n,
+                            struct ib_xrcd *xrcd)
+{
+       struct xrcd_table_entry *entry, *scan;
+       struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+       struct rb_node *parent = NULL;
+
+       entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       entry->inode = i_n;
+       entry->xrcd = xrcd;
+
+       while (*p)
+       {
+               parent = *p;
+               scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+               if (i_n < scan->inode)
+                       p = &(*p)->rb_left;
+               else if (i_n > scan->inode)
+                       p = &(*p)->rb_right;
+               else {
+                       kfree(entry);
+                       return -EEXIST;
+               }
+       }
+
+       rb_link_node(&entry->node, parent, p);
+       rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table);
+       return 0;
+}
+
+static int insert_xrcd(struct ib_device *dev, struct inode *i_n,
+                      struct ib_xrcd *xrcd)
+{
+       int ret;
+
+       ret = xrcd_table_insert(dev, i_n, xrcd);
+       if (!ret)
+               igrab(i_n);
+
+       return ret;
+}
+
+static struct xrcd_table_entry * xrcd_table_search(struct ib_device *dev,
+                                                  struct inode *i_n)
+{
+       struct xrcd_table_entry *scan;
+       struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node;
+       struct rb_node *parent = NULL;
+
+       while (*p)
+       {
+               parent = *p;
+               scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+               if (i_n < scan->inode)
+                       p = &(*p)->rb_left;
+               else if (i_n > scan->inode)
+                       p = &(*p)->rb_right;
+               else
+                       return scan;
+       }
+       return NULL;
+}
+
+static int find_xrcd(struct ib_device *dev, struct inode *i_n,
+                    struct ib_xrcd **xrcd)
+{
+       struct xrcd_table_entry *entry;
+
+       entry = xrcd_table_search(dev, i_n);
+       if (!entry)
+               return -EINVAL;
+
+       *xrcd = entry->xrcd;
+       return 0;
+}
+
+
+static void xrcd_table_delete(struct ib_device *dev,
+                             struct inode *i_n)
+{
+       struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n);
+
+       if (entry) {
+               iput(i_n);
+               rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table);
+               kfree(entry);
+       }
+}
+
 ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
                                  const char __user *buf, int in_len,
                                  int out_len)
@@ -2314,8 +2432,10 @@ ssize_t ib_uverbs_open_xrc_domain(struct
        struct ib_uverbs_open_xrc_domain_resp resp;
        struct ib_udata                  udata;
        struct ib_uobject               *uobj;
-       struct ib_xrcd                  *xrcd;
-       int                              ret;
+       struct ib_xrcd                  *xrcd = NULL;
+       struct inode                    *inode = NULL;
+       int                              ret = 0;
+       int                              new_xrcd = 0;
 
        if (out_len < sizeof resp)
                return -ENOSPC;
@@ -2323,35 +2443,55 @@ ssize_t ib_uverbs_open_xrc_domain(struct
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       /* file descriptors/inodes not yet implemented */
-       if (cmd.fd != (u32) (-1))
-               return -ENOSYS;
-
        INIT_UDATA(&udata, buf + sizeof cmd,
                   (unsigned long) cmd.response + sizeof resp,
                   in_len - sizeof cmd, out_len - sizeof resp);
 
+       mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
+       if (cmd.fd != (u32) (-1)) {
+               /* search for file descriptor */
+               inode = xrc_fd2inode(cmd.fd);
+               if (!inode) {
+                       ret = -EBADF;
+                       goto err_table_mutex_unlock;
+               }
+
+               ret = find_xrcd(file->device->ib_dev, inode, &xrcd);
+               if (ret && !(cmd.oflags & O_CREAT)) {
+                       /* no file descriptor. Need CREATE flag */
+                       ret = -EAGAIN;
+                       goto err_table_mutex_unlock;
+               }
+
+               if (xrcd && cmd.oflags & O_EXCL){
+                       ret = -EINVAL;
+                       goto err_table_mutex_unlock;
+               }
+       }
+
        uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
-       if (!uobj)
-               return -ENOMEM;
+       if (!uobj) {
+               ret = -ENOMEM;
+               goto err_table_mutex_unlock;
+       }
 
        init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
        down_write(&uobj->mutex);
 
-
-       xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
-                                               file->ucontext, &udata);
-       if (IS_ERR(xrcd)) {
-               ret = PTR_ERR(xrcd);
-               goto err;
+       if (!xrcd) {
+               xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+                                                       file->ucontext, &udata);
+               if (IS_ERR(xrcd)) {
+                       ret = PTR_ERR(xrcd);
+                       goto err;
+               }
+               xrcd->uobject = (cmd.fd == -1) ? uobj : NULL;
+               xrcd->inode = inode;
+               xrcd->device  = file->device->ib_dev;
+               atomic_set(&xrcd->usecnt, 0);
+               new_xrcd = 1;
        }
 
-       xrcd->fd = cmd.fd;
-       xrcd->flags = cmd.oflags;
-       xrcd->uobject = uobj;
-       xrcd->device  = file->device->ib_dev;
-       atomic_set(&xrcd->usecnt, 0);
-
        uobj->object = xrcd;
        ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj);
        if (ret)
@@ -2360,6 +2500,16 @@ ssize_t ib_uverbs_open_xrc_domain(struct
        memset(&resp, 0, sizeof resp);
        resp.xrcd_handle = uobj->id;
 
+       if (inode) {
+               if (new_xrcd) {
+               /* create new inode/xrcd table entry */
+                       ret = insert_xrcd(file->device->ib_dev, inode, xrcd);
+                       if (ret)
+                               goto err_insert_xrcd;
+               }
+               atomic_inc(&xrcd->usecnt);
+       }
+
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
                ret = -EFAULT;
@@ -2374,16 +2524,29 @@ ssize_t ib_uverbs_open_xrc_domain(struct
 
        up_write(&uobj->mutex);
 
+       mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
        return in_len;
 
 err_copy:
-       idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+
+       if (inode) {
+               if (new_xrcd)
+                       xrcd_table_delete(file->device->ib_dev, inode);
+               atomic_dec(&xrcd->usecnt);
+       }
+
+err_insert_xrcd:
+       idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
 
 err_idr:
        ib_dealloc_xrcd(xrcd);
 
 err:
        put_uobj_write(uobj);
+
+err_table_mutex_unlock:
+
+       mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
        return ret;
 }
 
@@ -2393,14 +2556,25 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 {
        struct ib_uverbs_close_xrc_domain cmd;
        struct ib_uobject          *uobj;
-       int                         ret;
+       struct ib_xrcd             *xrcd = NULL;
+       struct inode               *inode = NULL;
+       int                         ret = 0;
 
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
+       mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
        uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle, 
file->ucontext);
-       if (!uobj)
-               return -EINVAL;
+       if (!uobj) {
+               ret = -EINVAL;
+               goto err_unlock_mutex;
+       }
+
+       xrcd = (struct ib_xrcd *) (uobj->object);
+       inode = xrcd->inode;
+
+       if (inode)
+               atomic_dec(&xrcd->usecnt);
 
        ret = ib_dealloc_xrcd(uobj->object);
        if (!ret)
@@ -2408,8 +2582,11 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 
        put_uobj_write(uobj);
 
-       if (ret)
-               return ret;
+       if (ret && !inode)
+               goto err_unlock_mutex;
+
+       if (!ret && inode)
+               xrcd_table_delete(file->device->ib_dev, inode);
 
        idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
 
@@ -2419,6 +2596,27 @@ ssize_t ib_uverbs_close_xrc_domain(struc
 
        put_uobj(uobj);
 
+       mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
        return in_len;
+
+err_unlock_mutex:
+       mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
+       return ret;
 }
 
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+                                  struct ib_xrcd *xrcd)
+{
+       struct inode               *inode = NULL;
+       int                         ret = 0;
+
+       inode = xrcd->inode;
+       if (inode)
+               atomic_dec(&xrcd->usecnt);
+
+       ret = ib_dealloc_xrcd(xrcd);
+       if (!ret && inode)
+               xrcd_table_delete(ib_dev, inode);
+}
+
+
Index: ofed_kernel/include/rdma/ib_verbs.h
===================================================================
--- ofed_kernel.orig/include/rdma/ib_verbs.h    2007-09-16 16:32:43.674747000 
+0200
+++ ofed_kernel/include/rdma/ib_verbs.h 2007-09-17 12:31:55.239267000 +0200
@@ -52,6 +52,8 @@
 #include <asm/atomic.h>
 #include <asm/scatterlist.h>
 #include <asm/uaccess.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
 
 union ib_gid {
        u8      raw[16];
@@ -758,11 +760,8 @@ struct ib_pd {
 struct ib_xrcd {
        struct ib_device       *device;
        struct ib_uobject      *uobject;
-       struct rb_node          node;
-       u32                     xrc_domain_num;
        struct inode           *inode;
-       int                     fd;
-       u32                     flags;
+       struct rb_node          node;
        atomic_t                usecnt; /* count all resources */
 };
 
@@ -1084,6 +1083,8 @@ struct ib_device {
        __be64                       node_guid;
        u8                           node_type;
        u8                           phys_port_cnt;
+       struct rb_root               ib_uverbs_xrcd_table;
+       struct mutex                 xrcd_table_mutex;
 };
 
 struct ib_client {
Index: ofed_kernel/drivers/infiniband/core/device.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/device.c   2007-09-10 
09:07:21.951463000 +0300
+++ ofed_kernel/drivers/infiniband/core/device.c        2007-09-17 
15:06:09.213698000 +0200
@@ -290,6 +290,8 @@ int ib_register_device(struct ib_device 
        INIT_LIST_HEAD(&device->client_data_list);
        spin_lock_init(&device->event_handler_lock);
        spin_lock_init(&device->client_data_lock);
+       device->ib_uverbs_xrcd_table = RB_ROOT;
+       mutex_init(&device->xrcd_table_mutex);
 
        ret = read_port_table_lengths(device);
        if (ret) {
Index: ofed_kernel/drivers/infiniband/core/uverbs_main.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/uverbs_main.c      2007-09-16 
16:32:22.000000000 +0200
+++ ofed_kernel/drivers/infiniband/core/uverbs_main.c   2007-09-17 
15:17:28.552933000 +0200
@@ -243,13 +243,15 @@ static int ib_uverbs_cleanup_ucontext(st
                kfree(uobj);
        }
 
+       mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
        list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) {
                struct ib_xrcd *xrcd = uobj->object;
 
                idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj);
-               ib_dealloc_xrcd(xrcd);
+               ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd);
                kfree(uobj);
        }
+       mutex_unlock(&file->device->ib_dev->xrcd_table_mutex);
 
        list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
                struct ib_pd *pd = uobj->object;
Index: ofed_kernel/drivers/infiniband/core/uverbs.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/core/uverbs.h   2007-09-17 
15:18:32.000000000 +0200
+++ ofed_kernel/drivers/infiniband/core/uverbs.h        2007-09-17 
15:19:16.885160000 +0200
@@ -164,6 +164,8 @@ void ib_uverbs_qp_event_handler(struct i
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
                             struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
+                           struct ib_xrcd *xrcd);
 
 #define IB_UVERBS_DECLARE_CMD(name)                                    \
        ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,           \
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to