Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f7c6a7b5d59980b076abbf2ceeb8735591290285
Commit:     f7c6a7b5d59980b076abbf2ceeb8735591290285
Parent:     36f021b579d195cdc5fa6f3e2bab198b4bf70643
Author:     Roland Dreier <[EMAIL PROTECTED]>
AuthorDate: Sun Mar 4 16:15:11 2007 -0800
Committer:  Roland Dreier <[EMAIL PROTECTED]>
CommitDate: Tue May 8 18:00:37 2007 -0700

    IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules
    
    Export ib_umem_get()/ib_umem_release() and put low-level drivers in
    control of when to call ib_umem_get() to pin and DMA map userspace,
    rather than always calling it in ib_uverbs_reg_mr() before calling the
    low-level driver's reg_user_mr method.
    
    Also move these functions to be in the ib_core module instead of
    ib_uverbs, so that driver modules using them do not depend on
    ib_uverbs.
    
    This has a number of advantages:
     - It is better design from the standpoint of making generic code a
       library that can be used or overridden by device-specific code as
       the details of specific devices dictate.
     - Drivers that do not need to pin userspace memory regions do not
       need to take the performance hit of calling ib_mem_get().  For
       example, although I have not tried to implement it in this patch,
       the ipath driver should be able to avoid pinning memory and just
       use copy_{to,from}_user() to access userspace memory regions.
     - Buffers that need special mapping treatment can be identified by
       the low-level driver.  For example, it may be possible to solve
       some Altix-specific memory ordering issues with mthca CQs in
       userspace by mapping CQ buffers with extra flags.
     - Drivers that need to pin and DMA map userspace memory for things
       other than memory regions can use ib_umem_get() directly, instead
       of hacks using extra parameters to their reg_phys_mr method.  For
       example, the mlx4 driver that is pending being merged needs to pin
       and DMA map QP and CQ buffers, but it does not need to create a
       memory key for these buffers.  So the cleanest solution is for mlx4
       to call ib_umem_get() in the create_qp and create_cq methods.
    
    Signed-off-by: Roland Dreier <[EMAIL PROTECTED]>
---
 drivers/infiniband/Kconfig                   |    5 +
 drivers/infiniband/core/Makefile             |    4 +-
 drivers/infiniband/core/device.c             |    2 +
 drivers/infiniband/core/umem.c               |  273 ++++++++++++++++++++++++++
 drivers/infiniband/core/uverbs.h             |    6 +-
 drivers/infiniband/core/uverbs_cmd.c         |   60 ++----
 drivers/infiniband/core/uverbs_main.c        |   11 +-
 drivers/infiniband/core/uverbs_mem.c         |  225 ---------------------
 drivers/infiniband/hw/amso1100/c2_provider.c |   42 +++--
 drivers/infiniband/hw/amso1100/c2_provider.h |    1 +
 drivers/infiniband/hw/cxgb3/iwch_provider.c  |   28 ++-
 drivers/infiniband/hw/cxgb3/iwch_provider.h  |    1 +
 drivers/infiniband/hw/ehca/ehca_classes.h    |    1 +
 drivers/infiniband/hw/ehca/ehca_iverbs.h     |    3 +-
 drivers/infiniband/hw/ehca/ehca_mrmw.c       |   69 ++++---
 drivers/infiniband/hw/ipath/ipath_mr.c       |   38 +++-
 drivers/infiniband/hw/ipath/ipath_verbs.h    |    5 +-
 drivers/infiniband/hw/mthca/mthca_provider.c |   38 +++-
 drivers/infiniband/hw/mthca/mthca_provider.h |    1 +
 include/rdma/ib_umem.h                       |   78 ++++++++
 include/rdma/ib_verbs.h                      |   28 +---
 21 files changed, 536 insertions(+), 383 deletions(-)

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66b36de..82afba5 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
          libibverbs, libibcm and a hardware driver library from
          <http://www.openib.org>.
 
+config INFINIBAND_USER_MEM
+       bool
+       depends on INFINIBAND_USER_ACCESS != n
+       default y
+
 config INFINIBAND_ADDR_TRANS
        bool
        depends on INFINIBAND && INET
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 189e5d4..cb1ab3e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
 
 ib_core-y :=                   packer.o ud_header.o verbs.o sysfs.o \
                                device.o fmr_pool.o cache.o
+ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 
 ib_mad-y :=                    mad.o smi.o agent.o mad_rmpp.o
 
@@ -28,5 +29,4 @@ ib_umad-y :=                  user_mad.o
 
 ib_ucm-y :=                    ucm.o
 
-ib_uverbs-y :=                 uverbs_main.o uverbs_cmd.o uverbs_mem.o \
-                               uverbs_marshall.o
+ib_uverbs-y :=                 uverbs_main.o uverbs_cmd.o uverbs_marshall.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7fabb42..592c90a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
 {
        ib_cache_cleanup();
        ib_sysfs_cleanup();
+       /* Make sure that any pending umem accounting work is done. */
+       flush_scheduled_work();
 }
 
 module_init(ib_core_init);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
new file mode 100644
index 0000000..48e854c
--- /dev/null
+++ b/drivers/infiniband/core/umem.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+
+#include "uverbs.h"
+
+struct ib_umem_account_work {
+       struct work_struct work;
+       struct mm_struct  *mm;
+       unsigned long      diff;
+};
+
+
+static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int 
dirty)
+{
+       struct ib_umem_chunk *chunk, *tmp;
+       int i;
+
+       list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
+               ib_dma_unmap_sg(dev, chunk->page_list,
+                               chunk->nents, DMA_BIDIRECTIONAL);
+               for (i = 0; i < chunk->nents; ++i) {
+                       if (umem->writable && dirty)
+                               set_page_dirty_lock(chunk->page_list[i].page);
+                       put_page(chunk->page_list[i].page);
+               }
+
+               kfree(chunk);
+       }
+}
+
+/**
+ * ib_umem_get - Pin and DMA map userspace memory.
+ * @context: userspace context to pin memory for
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ */
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+                           size_t size, int access)
+{
+       struct ib_umem *umem;
+       struct page **page_list;
+       struct ib_umem_chunk *chunk;
+       unsigned long locked;
+       unsigned long lock_limit;
+       unsigned long cur_base;
+       unsigned long npages;
+       int ret;
+       int off;
+       int i;
+
+       if (!can_do_mlock())
+               return ERR_PTR(-EPERM);
+
+       umem = kmalloc(sizeof *umem, GFP_KERNEL);
+       if (!umem)
+               return ERR_PTR(-ENOMEM);
+
+       umem->context   = context;
+       umem->length    = size;
+       umem->offset    = addr & ~PAGE_MASK;
+       umem->page_size = PAGE_SIZE;
+       /*
+        * We ask for writable memory if any access flags other than
+        * "remote read" are set.  "Local write" and "remote write"
+        * obviously require write access.  "Remote atomic" can do
+        * things like fetch and add, which will modify memory, and
+        * "MW bind" can change permissions by binding a window.
+        */
+       umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
+
+       INIT_LIST_HEAD(&umem->chunk_list);
+
+       page_list = (struct page **) __get_free_page(GFP_KERNEL);
+       if (!page_list) {
+               kfree(umem);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
+
+       down_write(&current->mm->mmap_sem);
+
+       locked     = npages + current->mm->locked_vm;
+       lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> 
PAGE_SHIFT;
+
+       if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       cur_base = addr & PAGE_MASK;
+
+       while (npages) {
+               ret = get_user_pages(current, current->mm, cur_base,
+                                    min_t(int, npages,
+                                          PAGE_SIZE / sizeof (struct page *)),
+                                    1, !umem->writable, page_list, NULL);
+
+               if (ret < 0)
+                       goto out;
+
+               cur_base += ret * PAGE_SIZE;
+               npages   -= ret;
+
+               off = 0;
+
+               while (ret) {
+                       chunk = kmalloc(sizeof *chunk + sizeof (struct 
scatterlist) *
+                                       min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
+                                       GFP_KERNEL);
+                       if (!chunk) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+
+                       chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
+                       for (i = 0; i < chunk->nents; ++i) {
+                               chunk->page_list[i].page   = page_list[i + off];
+                               chunk->page_list[i].offset = 0;
+                               chunk->page_list[i].length = PAGE_SIZE;
+                       }
+
+                       chunk->nmap = ib_dma_map_sg(context->device,
+                                                   &chunk->page_list[0],
+                                                   chunk->nents,
+                                                   DMA_BIDIRECTIONAL);
+                       if (chunk->nmap <= 0) {
+                               for (i = 0; i < chunk->nents; ++i)
+                                       put_page(chunk->page_list[i].page);
+                               kfree(chunk);
+
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+
+                       ret -= chunk->nents;
+                       off += chunk->nents;
+                       list_add_tail(&chunk->list, &umem->chunk_list);
+               }
+
+               ret = 0;
+       }
+
+out:
+       if (ret < 0) {
+               __ib_umem_release(context->device, umem, 0);
+               kfree(umem);
+       } else
+               current->mm->locked_vm = locked;
+
+       up_write(&current->mm->mmap_sem);
+       free_page((unsigned long) page_list);
+
+       return ret < 0 ? ERR_PTR(ret) : umem;
+}
+EXPORT_SYMBOL(ib_umem_get);
+
+static void ib_umem_account(struct work_struct *_work)
+{
+       struct ib_umem_account_work *work =
+               container_of(_work, struct ib_umem_account_work, work);
+
+       down_write(&work->mm->mmap_sem);
+       work->mm->locked_vm -= work->diff;
+       up_write(&work->mm->mmap_sem);
+       mmput(work->mm);
+       kfree(work);
+}
+
+/**
+ * ib_umem_release - release memory pinned with ib_umem_get
+ * @umem: umem struct to release
+ */
+void ib_umem_release(struct ib_umem *umem)
+{
+       struct ib_umem_account_work *work;
+       struct ib_ucontext *context = umem->context;
+       struct mm_struct *mm;
+       unsigned long diff;
+
+       __ib_umem_release(umem->context->device, umem, 1);
+
+       mm = get_task_mm(current);
+       if (!mm)
+               return;
+
+       diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+       kfree(umem);
+
+       /*
+        * We may be called with the mm's mmap_sem already held.  This
+        * can happen when a userspace munmap() is the call that drops
+        * the last reference to our file and calls our release
+        * method.  If there are memory regions to destroy, we'll end
+        * up here and not be able to take the mmap_sem.  In that case
+        * we defer the vm_locked accounting to the system workqueue.
+        */
+       if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
+               work = kmalloc(sizeof *work, GFP_KERNEL);
+               if (!work) {
+                       mmput(mm);
+                       return;
+               }
+
+               INIT_WORK(&work->work, ib_umem_account);
+               work->mm   = mm;
+               work->diff = diff;
+
+               schedule_work(&work->work);
+               return;
+       } else
+               down_write(&mm->mmap_sem);
+
+       current->mm->locked_vm -= diff;
+       up_write(&mm->mmap_sem);
+       mmput(mm);
+}
+EXPORT_SYMBOL(ib_umem_release);
+
+int ib_umem_page_count(struct ib_umem *umem)
+{
+       struct ib_umem_chunk *chunk;
+       int shift;
+       int i;
+       int n;
+
+       shift = ilog2(umem->page_size);
+
+       n = 0;
+       list_for_each_entry(chunk, &umem->chunk_list, list)
+               for (i = 0; i < chunk->nmap; ++i)
+                       n += sg_dma_len(&chunk->page_list[i]) >> shift;
+
+       return n;
+}
+EXPORT_SYMBOL(ib_umem_page_count);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 102a59c..c33546f 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -45,6 +45,7 @@
 #include <linux/completion.h>
 
 #include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
 
 /*
@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, 
void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
                             struct ib_event *event);
 
-int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
-               void *addr, size_t size, int write);
-void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
-void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
-
 #define IB_UVERBS_DECLARE_CMD(name)                                    \
        ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,           \
                                 const char __user *buf, int in_len,    \
diff --git a/drivers/infiniband/core/uverbs_cmd.c 
b/drivers/infiniband/core/uverbs_cmd.c
index bab6676..01d7008 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.
  *
@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
        INIT_LIST_HEAD(&ucontext->qp_list);
        INIT_LIST_HEAD(&ucontext->srq_list);
        INIT_LIST_HEAD(&ucontext->ah_list);
+       ucontext->closing = 0;
 
        resp.num_comp_vectors = file->device->num_comp_vectors;
 
@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        struct ib_uverbs_reg_mr      cmd;
        struct ib_uverbs_reg_mr_resp resp;
        struct ib_udata              udata;
-       struct ib_umem_object       *obj;
+       struct ib_uobject           *uobj;
        struct ib_pd                *pd;
        struct ib_mr                *mr;
        int                          ret;
@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
            !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
                return -EINVAL;
 
-       obj = kmalloc(sizeof *obj, GFP_KERNEL);
-       if (!obj)
+       uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+       if (!uobj)
                return -ENOMEM;
 
-       init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key);
-       down_write(&obj->uobject.mutex);
-
-       /*
-        * We ask for writable memory if any access flags other than
-        * "remote read" are set.  "Local write" and "remote write"
-        * obviously require write access.  "Remote atomic" can do
-        * things like fetch and add, which will modify memory, and
-        * "MW bind" can change permissions by binding a window.
-        */
-       ret = ib_umem_get(file->device->ib_dev, &obj->umem,
-                         (void *) (unsigned long) cmd.start, cmd.length,
-                         !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
-       if (ret)
-               goto err_free;
-
-       obj->umem.virt_base = cmd.hca_va;
+       init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
+       down_write(&uobj->mutex);
 
        pd = idr_read_pd(cmd.pd_handle, file->ucontext);
        if (!pd) {
                ret = -EINVAL;
-               goto err_release;
+               goto err_free;
        }
 
-       mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata);
+       mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+                                    cmd.access_flags, &udata);
        if (IS_ERR(mr)) {
                ret = PTR_ERR(mr);
                goto err_put;
@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 
        mr->device  = pd->device;
        mr->pd      = pd;
-       mr->uobject = &obj->uobject;
+       mr->uobject = uobj;
        atomic_inc(&pd->usecnt);
        atomic_set(&mr->usecnt, 0);
 
-       obj->uobject.object = mr;
-       ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject);
+       uobj->object = mr;
+       ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
        if (ret)
                goto err_unreg;
 
        memset(&resp, 0, sizeof resp);
        resp.lkey      = mr->lkey;
        resp.rkey      = mr->rkey;
-       resp.mr_handle = obj->uobject.id;
+       resp.mr_handle = uobj->id;
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp)) {
@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        put_pd_read(pd);
 
        mutex_lock(&file->mutex);
-       list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+       list_add_tail(&uobj->list, &file->ucontext->mr_list);
        mutex_unlock(&file->mutex);
 
-       obj->uobject.live = 1;
+       uobj->live = 1;
 
-       up_write(&obj->uobject.mutex);
+       up_write(&uobj->mutex);
 
        return in_len;
 
 err_copy:
-       idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject);
+       idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
 
 err_unreg:
        ib_dereg_mr(mr);
@@ -676,11 +663,8 @@ err_unreg:
 err_put:
        put_pd_read(pd);
 
-err_release:
-       ib_umem_release(file->device->ib_dev, &obj->umem);
-
 err_free:
-       put_uobj_write(&obj->uobject);
+       put_uobj_write(uobj);
        return ret;
 }
 
@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
        struct ib_uverbs_dereg_mr cmd;
        struct ib_mr             *mr;
        struct ib_uobject        *uobj;
-       struct ib_umem_object    *memobj;
        int                       ret = -EINVAL;
 
        if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
        if (!uobj)
                return -EINVAL;
 
-       memobj = container_of(uobj, struct ib_umem_object, uobject);
-       mr     = uobj->object;
+       mr = uobj->object;
 
        ret = ib_dereg_mr(mr);
        if (!ret)
@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
        list_del(&uobj->list);
        mutex_unlock(&file->mutex);
 
-       ib_umem_release(file->device->ib_dev, &memobj->umem);
-
        put_uobj(uobj);
 
        return in_len;
diff --git a/drivers/infiniband/core/uverbs_main.c 
b/drivers/infiniband/core/uverbs_main.c
index d44e547..14d7ccd 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file 
*file,
        if (!context)
                return 0;
 
+       context->closing = 1;
+
        list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
                struct ib_ah *ah = uobj->object;
 
@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct 
ib_uverbs_file *file,
 
        list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
                struct ib_mr *mr = uobj->object;
-               struct ib_device *mrdev = mr->device;
-               struct ib_umem_object *memobj;
 
                idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
                ib_dereg_mr(mr);
-
-               memobj = container_of(uobj, struct ib_umem_object, uobject);
-               ib_umem_release_on_close(mrdev, &memobj->umem);
-
-               kfree(memobj);
+               kfree(uobj);
        }
 
        list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
        unregister_filesystem(&uverbs_event_fs);
        class_destroy(uverbs_class);
        unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
-       flush_scheduled_work();
        idr_destroy(&ib_uverbs_pd_idr);
        idr_destroy(&ib_uverbs_mr_idr);
        idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/core/uverbs_mem.c 
b/drivers/infiniband/core/uverbs_mem.c
deleted file mode 100644
index c95fe95..0000000
--- a/drivers/infiniband/core/uverbs_mem.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Cisco Systems.  All rights reserved.
- * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $
- */
-
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-
-#include "uverbs.h"
-
-struct ib_umem_account_work {
-       struct work_struct work;
-       struct mm_struct  *mm;
-       unsigned long      diff;
-};
-
-
-static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int 
dirty)
-{
-       struct ib_umem_chunk *chunk, *tmp;
-       int i;
-
-       list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
-               ib_dma_unmap_sg(dev, chunk->page_list,
-                               chunk->nents, DMA_BIDIRECTIONAL);
-               for (i = 0; i < chunk->nents; ++i) {
-                       if (umem->writable && dirty)
-                               set_page_dirty_lock(chunk->page_list[i].page);
-                       put_page(chunk->page_list[i].page);
-               }
-
-               kfree(chunk);
-       }
-}
-
-int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
-               void *addr, size_t size, int write)
-{
-       struct page **page_list;
-       struct ib_umem_chunk *chunk;
-       unsigned long locked;
-       unsigned long lock_limit;
-       unsigned long cur_base;
-       unsigned long npages;
-       int ret = 0;
-       int off;
-       int i;
-
-       if (!can_do_mlock())
-               return -EPERM;
-
-       page_list = (struct page **) __get_free_page(GFP_KERNEL);
-       if (!page_list)
-               return -ENOMEM;
-
-       mem->user_base = (unsigned long) addr;
-       mem->length    = size;
-       mem->offset    = (unsigned long) addr & ~PAGE_MASK;
-       mem->page_size = PAGE_SIZE;
-       mem->writable  = write;
-
-       INIT_LIST_HEAD(&mem->chunk_list);
-
-       npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT;
-
-       down_write(&current->mm->mmap_sem);
-
-       locked     = npages + current->mm->locked_vm;
-       lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> 
PAGE_SHIFT;
-
-       if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       cur_base = (unsigned long) addr & PAGE_MASK;
-
-       while (npages) {
-               ret = get_user_pages(current, current->mm, cur_base,
-                                    min_t(int, npages,
-                                          PAGE_SIZE / sizeof (struct page *)),
-                                    1, !write, page_list, NULL);
-
-               if (ret < 0)
-                       goto out;
-
-               cur_base += ret * PAGE_SIZE;
-               npages   -= ret;
-
-               off = 0;
-
-               while (ret) {
-                       chunk = kmalloc(sizeof *chunk + sizeof (struct 
scatterlist) *
-                                       min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
-                                       GFP_KERNEL);
-                       if (!chunk) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-
-                       chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
-                       for (i = 0; i < chunk->nents; ++i) {
-                               chunk->page_list[i].page   = page_list[i + off];
-                               chunk->page_list[i].offset = 0;
-                               chunk->page_list[i].length = PAGE_SIZE;
-                       }
-
-                       chunk->nmap = ib_dma_map_sg(dev,
-                                                   &chunk->page_list[0],
-                                                   chunk->nents,
-                                                   DMA_BIDIRECTIONAL);
-                       if (chunk->nmap <= 0) {
-                               for (i = 0; i < chunk->nents; ++i)
-                                       put_page(chunk->page_list[i].page);
-                               kfree(chunk);
-
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-
-                       ret -= chunk->nents;
-                       off += chunk->nents;
-                       list_add_tail(&chunk->list, &mem->chunk_list);
-               }
-
-               ret = 0;
-       }
-
-out:
-       if (ret < 0)
-               __ib_umem_release(dev, mem, 0);
-       else
-               current->mm->locked_vm = locked;
-
-       up_write(&current->mm->mmap_sem);
-       free_page((unsigned long) page_list);
-
-       return ret;
-}
-
-void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
-{
-       __ib_umem_release(dev, umem, 1);
-
-       down_write(&current->mm->mmap_sem);
-       current->mm->locked_vm -=
-               PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
-       up_write(&current->mm->mmap_sem);
-}
-
-static void ib_umem_account(struct work_struct *_work)
-{
-       struct ib_umem_account_work *work =
-               container_of(_work, struct ib_umem_account_work, work);
-
-       down_write(&work->mm->mmap_sem);
-       work->mm->locked_vm -= work->diff;
-       up_write(&work->mm->mmap_sem);
-       mmput(work->mm);
-       kfree(work);
-}
-
-void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem)
-{
-       struct ib_umem_account_work *work;
-       struct mm_struct *mm;
-
-       __ib_umem_release(dev, umem, 1);
-
-       mm = get_task_mm(current);
-       if (!mm)
-               return;
-
-       /*
-        * We may be called with the mm's mmap_sem already held.  This
-        * can happen when a userspace munmap() is the call that drops
-        * the last reference to our file and calls our release
-        * method.  If there are memory regions to destroy, we'll end
-        * up here and not be able to take the mmap_sem.  Therefore we
-        * defer the vm_locked accounting to the system workqueue.
-        */
-
-       work = kmalloc(sizeof *work, GFP_KERNEL);
-       if (!work) {
-               mmput(mm);
-               return;
-       }
-
-       INIT_WORK(&work->work, ib_umem_account);
-       work->mm   = mm;
-       work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
-
-       schedule_work(&work->work);
-}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c 
b/drivers/infiniband/hw/amso1100/c2_provider.c
index 1091662..997cf15 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -56,6 +56,7 @@
 #include <asm/byteorder.h>
 
 #include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
 #include "c2.h"
 #include "c2_provider.h"
@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
        }
 
        mr->pd = to_c2pd(ib_pd);
+       mr->umem = NULL;
        pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
                "*iova_start %llx, first pa %llx, last pa %llx\n",
                __FUNCTION__, page_shift, pbl_depth, total_len,
@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int 
acc)
        return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
 }
 
-static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
-                                   int acc, struct ib_udata *udata)
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                                   u64 virt, int acc, struct ib_udata *udata)
 {
        u64 *pages;
        u64 kva = 0;
@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, 
struct ib_umem *region,
        struct c2_mr *c2mr;
 
        pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
-       shift = ffs(region->page_size) - 1;
 
        c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
        if (!c2mr)
                return ERR_PTR(-ENOMEM);
        c2mr->pd = c2pd;
 
+       c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+       if (IS_ERR(c2mr->umem)) {
+               err = PTR_ERR(c2mr->umem);
+               kfree(c2mr);
+               return ERR_PTR(err);
+       }
+
+       shift = ffs(c2mr->umem->page_size) - 1;
+
        n = 0;
-       list_for_each_entry(chunk, &region->chunk_list, list)
+       list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
                n += chunk->nents;
 
        pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, 
struct ib_umem *region,
        }
 
        i = 0;
-       list_for_each_entry(chunk, &region->chunk_list, list) {
+       list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
                for (j = 0; j < chunk->nmap; ++j) {
                        len = sg_dma_len(&chunk->page_list[j]) >> shift;
                        for (k = 0; k < len; ++k) {
                                pages[i++] =
                                        sg_dma_address(&chunk->page_list[j]) +
-                                       (region->page_size * k);
+                                       (c2mr->umem->page_size * k);
                        }
                }
        }
 
-       kva = (u64)region->virt_base;
+       kva = virt;
        err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
                                         pages,
-                                        region->page_size,
+                                        c2mr->umem->page_size,
                                         i,
-                                        region->length,
-                                        region->offset,
+                                        length,
+                                        c2mr->umem->offset,
                                         &kva,
                                         c2_convert_access(acc),
                                         c2mr);
        kfree(pages);
-       if (err) {
-               kfree(c2mr);
-               return ERR_PTR(err);
-       }
+       if (err)
+               goto err;
        return &c2mr->ibmr;
 
 err:
+       ib_umem_release(c2mr->umem);
        kfree(c2mr);
        return ERR_PTR(err);
 }
@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
        err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
        if (err)
                pr_debug("c2_stag_dealloc failed: %d\n", err);
-       else
+       else {
+               if (mr->umem)
+                       ib_umem_release(mr->umem);
                kfree(mr);
+       }
 
        return err;
 }
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h 
b/drivers/infiniband/hw/amso1100/c2_provider.h
index fc90622..1076df2 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -73,6 +73,7 @@ struct c2_pd {
 struct c2_mr {
        struct ib_mr ibmr;
        struct c2_pd *pd;
+       struct ib_umem *umem;
 };
 
 struct c2_av;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c 
b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index a891493..e7c2c39 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -47,6 +47,7 @@
 #include <rdma/iw_cm.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
 
 #include "cxio_hal.h"
@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
        remove_handle(rhp, &rhp->mmidr, mmid);
        if (mhp->kva)
                kfree((void *) (unsigned long) mhp->kva);
+       if (mhp->umem)
+               ib_umem_release(mhp->umem);
        PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
        kfree(mhp);
        return 0;
@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
 }
 
 
-static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
-                                     int acc, struct ib_udata *udata)
+static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                                     u64 virt, int acc, struct ib_udata *udata)
 {
        __be64 *pages;
        int shift, n, len;
@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, 
struct ib_umem *region,
        struct iwch_reg_user_mr_resp uresp;
 
        PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
-       shift = ffs(region->page_size) - 1;
 
        php = to_iwch_pd(pd);
        rhp = php->rhp;
@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, 
struct ib_umem *region,
        if (!mhp)
                return ERR_PTR(-ENOMEM);
 
+       mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+       if (IS_ERR(mhp->umem)) {
+               err = PTR_ERR(mhp->umem);
+               kfree(mhp);
+               return ERR_PTR(err);
+       }
+
+       shift = ffs(mhp->umem->page_size) - 1;
+
        n = 0;
-       list_for_each_entry(chunk, &region->chunk_list, list)
+       list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
                n += chunk->nents;
 
        pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, 
struct ib_umem *region,
 
        i = n = 0;
 
-       list_for_each_entry(chunk, &region->chunk_list, list)
+       list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
                for (j = 0; j < chunk->nmap; ++j) {
                        len = sg_dma_len(&chunk->page_list[j]) >> shift;
                        for (k = 0; k < len; ++k) {
                                pages[i++] = cpu_to_be64(sg_dma_address(
                                        &chunk->page_list[j]) +
-                                       region->page_size * k);
+                                       mhp->umem->page_size * k);
                        }
                }
 
@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, 
struct ib_umem *region,
        mhp->attr.pdid = php->pdid;
        mhp->attr.zbva = 0;
        mhp->attr.perms = iwch_ib_to_tpt_access(acc);
-       mhp->attr.va_fbo = region->virt_base;
+       mhp->attr.va_fbo = virt;
        mhp->attr.page_size = shift - 12;
-       mhp->attr.len = (u32) region->length;
+       mhp->attr.len = (u32) length;
        mhp->attr.pbl_size = i;
        err = iwch_register_mem(rhp, php, mhp, shift, pages);
        kfree(pages);
@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, 
struct ib_umem *region,
        return &mhp->ibmr;
 
 err:
+       ib_umem_release(mhp->umem);
        kfree(mhp);
        return ERR_PTR(err);
 }
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h 
b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 93bcc56..48833f3 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -73,6 +73,7 @@ struct tpt_attributes {
 
 struct iwch_mr {
        struct ib_mr ibmr;
+       struct ib_umem *umem;
        struct iwch_dev *rhp;
        u64 kva;
        struct tpt_attributes attr;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h 
b/drivers/infiniband/hw/ehca/ehca_classes.h
index 10fb8fb..f64d42b 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -176,6 +176,7 @@ struct ehca_mr {
                struct ib_mr ib_mr;     /* must always be first in ehca_mr */
                struct ib_fmr ib_fmr;   /* must always be first in ehca_mr */
        } ib;
+       struct ib_umem *umem;
        spinlock_t mrlock;
 
        enum ehca_mr_flag flags;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h 
b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index e14b029..37e7fe0 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
                               int num_phys_buf,
                               int mr_access_flags, u64 *iova_start);
 
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
-                              struct ib_umem *region,
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 
virt,
                               int mr_access_flags, struct ib_udata *udata);
 
 int ehca_rereg_phys_mr(struct ib_mr *mr,
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c 
b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index d22ab56..84c5bb4 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -39,6 +39,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <rdma/ib_umem.h>
+
 #include <asm/current.h>
 
 #include "ehca_iverbs.h"
@@ -238,10 +240,8 @@ reg_phys_mr_exit0:
 
 /*----------------------------------------------------------------------*/
 
-struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
-                              struct ib_umem *region,
-                              int mr_access_flags,
-                              struct ib_udata *udata)
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 
virt,
+                              int mr_access_flags, struct ib_udata *udata)
 {
        struct ib_mr *ib_mr;
        struct ehca_mr *e_mr;
@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
                ehca_gen_err("bad pd=%p", pd);
                return ERR_PTR(-EFAULT);
        }
-       if (!region) {
-               ehca_err(pd->device, "bad input values: region=%p", region);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
+
        if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
                ib_mr = ERR_PTR(-EINVAL);
                goto reg_user_mr_exit0;
        }
-       if (region->page_size != PAGE_SIZE) {
-               ehca_err(pd->device, "page size not supported, "
-                        "region->page_size=%x", region->page_size);
-               ib_mr = ERR_PTR(-EINVAL);
-               goto reg_user_mr_exit0;
-       }
 
-       if ((region->length == 0) ||
-           ((region->virt_base + region->length) < region->virt_base)) {
+       if (length == 0 || virt + length < virt) {
                ehca_err(pd->device, "bad input values: length=%lx "
-                        "virt_base=%lx", region->length, region->virt_base);
+                        "virt_base=%lx", length, virt);
                ib_mr = ERR_PTR(-EINVAL);
                goto reg_user_mr_exit0;
        }
@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
                goto reg_user_mr_exit0;
        }
 
+       e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
+                                mr_access_flags);
+       if (IS_ERR(e_mr->umem)) {
+               ib_mr = (void *) e_mr->umem;
+               goto reg_user_mr_exit1;
+       }
+
+       if (e_mr->umem->page_size != PAGE_SIZE) {
+               ehca_err(pd->device, "page size not supported, "
+                        "e_mr->umem->page_size=%x", e_mr->umem->page_size);
+               ib_mr = ERR_PTR(-EINVAL);
+               goto reg_user_mr_exit2;
+       }
+
        /* determine number of MR pages */
-       num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
-                        PAGE_SIZE - 1) / PAGE_SIZE);
-       num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
-                        EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
+       num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
+                       PAGE_SIZE);
+       num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
+                       EHCA_PAGESIZE);
 
        /* register MR on HCA */
        pginfo.type       = EHCA_MR_PGI_USER;
        pginfo.num_pages  = num_pages_mr;
        pginfo.num_4k     = num_pages_4k;
-       pginfo.region     = region;
-       pginfo.next_4k    = region->offset / EHCA_PAGESIZE;
+       pginfo.region     = e_mr->umem;
+       pginfo.next_4k    = e_mr->umem->offset / EHCA_PAGESIZE;
        pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
-                                              (&region->chunk_list),
+                                              (&e_mr->umem->chunk_list),
                                               list);
 
-       ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
-                         region->length, mr_access_flags, e_pd, &pginfo,
-                         &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
+       ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, 
e_pd,
+                         &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
        if (ret) {
                ib_mr = ERR_PTR(ret);
-               goto reg_user_mr_exit1;
+               goto reg_user_mr_exit2;
        }
 
        /* successful registration of all pages */
        return &e_mr->ib.ib_mr;
 
+reg_user_mr_exit2:
+       ib_umem_release(e_mr->umem);
 reg_user_mr_exit1:
        ehca_mr_delete(e_mr);
 reg_user_mr_exit0:
        if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
+               ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
                         " udata=%p",
-                        PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
+                        PTR_ERR(ib_mr), pd, mr_access_flags, udata);
        return ib_mr;
 } /* end ehca_reg_user_mr() */
 
@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
                goto dereg_mr_exit0;
        }
 
+       if (e_mr->umem)
+               ib_umem_release(e_mr->umem);
+
        /* successful deregistration */
        ehca_mr_delete(e_mr);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c 
b/drivers/infiniband/hw/ipath/ipath_mr.c
index 31e7073..bdeef8d 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <rdma/ib_umem.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_smi.h>
 
@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
        mr->mr.offset = 0;
        mr->mr.access_flags = acc;
        mr->mr.max_segs = num_phys_buf;
+       mr->umem = NULL;
 
        m = 0;
        n = 0;
@@ -170,46 +172,56 @@ bail:
 /**
  * ipath_reg_user_mr - register a userspace memory region
  * @pd: protection domain for this memory region
- * @region: the user memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @virt_addr: virtual address to use (from HCA's point of view)
  * @mr_access_flags: access flags for this memory region
  * @udata: unused by the InfiniPath driver
  *
  * Returns the memory region on success, otherwise returns an errno.
  */
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
-                               int mr_access_flags, struct ib_udata *udata)
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                               u64 virt_addr, int mr_access_flags,
+                               struct ib_udata *udata)
 {
        struct ipath_mr *mr;
+       struct ib_umem *umem;
        struct ib_umem_chunk *chunk;
        int n, m, i;
        struct ib_mr *ret;
 
-       if (region->length == 0) {
+       if (length == 0) {
                ret = ERR_PTR(-EINVAL);
                goto bail;
        }
 
+       umem = ib_umem_get(pd->uobject->context, start, length, 
mr_access_flags);
+       if (IS_ERR(umem))
+               return (void *) umem;
+
        n = 0;
-       list_for_each_entry(chunk, &region->chunk_list, list)
+       list_for_each_entry(chunk, &umem->chunk_list, list)
                n += chunk->nents;
 
        mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
        if (!mr) {
                ret = ERR_PTR(-ENOMEM);
+               ib_umem_release(umem);
                goto bail;
        }
 
        mr->mr.pd = pd;
-       mr->mr.user_base = region->user_base;
-       mr->mr.iova = region->virt_base;
-       mr->mr.length = region->length;
-       mr->mr.offset = region->offset;
+       mr->mr.user_base = start;
+       mr->mr.iova = virt_addr;
+       mr->mr.length = length;
+       mr->mr.offset = umem->offset;
        mr->mr.access_flags = mr_access_flags;
        mr->mr.max_segs = n;
+       mr->umem = umem;
 
        m = 0;
        n = 0;
-       list_for_each_entry(chunk, &region->chunk_list, list) {
+       list_for_each_entry(chunk, &umem->chunk_list, list) {
                for (i = 0; i < chunk->nents; i++) {
                        void *vaddr;
 
@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct 
ib_umem *region,
                                goto bail;
                        }
                        mr->mr.map[m]->segs[n].vaddr = vaddr;
-                       mr->mr.map[m]->segs[n].length = region->page_size;
+                       mr->mr.map[m]->segs[n].length = umem->page_size;
                        n++;
                        if (n == IPATH_SEGSZ) {
                                m++;
@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
                i--;
                kfree(mr->mr.map[i]);
        }
+
+       if (mr->umem)
+               ib_umem_release(mr->umem);
+
        kfree(mr);
        return 0;
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h 
b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 7064fc2..088b837 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -251,6 +251,7 @@ struct ipath_sge {
 /* Memory region */
 struct ipath_mr {
        struct ib_mr ibmr;
+       struct ib_umem *umem;
        struct ipath_mregion mr;        /* must be last */
 };
 
@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
                                struct ib_phys_buf *buffer_list,
                                int num_phys_buf, int acc, u64 *iova_start);
 
-struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
-                               int mr_access_flags,
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                               u64 virt_addr, int mr_access_flags,
                                struct ib_udata *udata);
 
 int ipath_dereg_mr(struct ib_mr *ibmr);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c 
b/drivers/infiniband/hw/mthca/mthca_provider.c
index 1c05486..6bcde1c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
  */
 
 #include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
 #include <linux/mm.h>
 
@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int 
acc)
                return ERR_PTR(err);
        }
 
+       mr->umem = NULL;
+
        return &mr->ibmr;
 }
 
@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd     
  *pd,
        }
 
        kfree(page_list);
+       mr->umem = NULL;
+
        return &mr->ibmr;
 }
 
-static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem 
*region,
-                                      int acc, struct ib_udata *udata)
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+                                      u64 virt, int acc, struct ib_udata 
*udata)
 {
        struct mthca_dev *dev = to_mdev(pd->device);
        struct ib_umem_chunk *chunk;
@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd 
*pd, struct ib_umem *region,
        int err = 0;
        int write_mtt_size;
 
-       shift = ffs(region->page_size) - 1;
-
        mr = kmalloc(sizeof *mr, GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
+       mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+       if (IS_ERR(mr->umem)) {
+               err = PTR_ERR(mr->umem);
+               goto err;
+       }
+
+       shift = ffs(mr->umem->page_size) - 1;
+
        n = 0;
-       list_for_each_entry(chunk, &region->chunk_list, list)
+       list_for_each_entry(chunk, &mr->umem->chunk_list, list)
                n += chunk->nents;
 
        mr->mtt = mthca_alloc_mtt(dev, n);
        if (IS_ERR(mr->mtt)) {
                err = PTR_ERR(mr->mtt);
-               goto err;
+               goto err_umem;
        }
 
        pages = (u64 *) __get_free_page(GFP_KERNEL);
@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd 
*pd, struct ib_umem *region,
 
        write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / 
sizeof *pages));
 
-       list_for_each_entry(chunk, &region->chunk_list, list)
+       list_for_each_entry(chunk, &mr->umem->chunk_list, list)
                for (j = 0; j < chunk->nmap; ++j) {
                        len = sg_dma_len(&chunk->page_list[j]) >> shift;
                        for (k = 0; k < len; ++k) {
                                pages[i++] = 
sg_dma_address(&chunk->page_list[j]) +
-                                       region->page_size * k;
+                                       mr->umem->page_size * k;
                                /*
                                 * Be friendly to write_mtt and pass it chunks
                                 * of appropriate size.
@@ -1071,8 +1082,8 @@ mtt_done:
        if (err)
                goto err_mtt;
 
-       err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
-                            region->length, convert_access(acc), mr);
+       err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
+                            convert_access(acc), mr);
 
        if (err)
                goto err_mtt;
@@ -1082,6 +1093,9 @@ mtt_done:
 err_mtt:
        mthca_free_mtt(dev, mr->mtt);
 
+err_umem:
+       ib_umem_release(mr->umem);
+
 err:
        kfree(mr);
        return ERR_PTR(err);
@@ -1090,8 +1104,12 @@ err:
 static int mthca_dereg_mr(struct ib_mr *mr)
 {
        struct mthca_mr *mmr = to_mmr(mr);
+
        mthca_free_mr(to_mdev(mr->device), mmr);
+       if (mmr->umem)
+               ib_umem_release(mmr->umem);
        kfree(mmr);
+
        return 0;
 }
 
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h 
b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1d266ac..262616c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -73,6 +73,7 @@ struct mthca_mtt;
 
 struct mthca_mr {
        struct ib_mr      ibmr;
+       struct ib_umem   *umem;
        struct mthca_mtt *mtt;
 };
 
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
new file mode 100644
index 0000000..06307f7
--- /dev/null
+++ b/include/rdma/ib_umem.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2007 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_H
+#define IB_UMEM_H
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+struct ib_ucontext;
+
+struct ib_umem {
+       struct ib_ucontext     *context;
+       size_t                  length;
+       int                     offset;
+       int                     page_size;
+       int                     writable;
+       struct list_head        chunk_list;
+};
+
+struct ib_umem_chunk {
+       struct list_head        list;
+       int                     nents;
+       int                     nmap;
+       struct scatterlist      page_list[0];
+};
+
+#ifdef CONFIG_INFINIBAND_USER_MEM
+
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+                           size_t size, int access);
+void ib_umem_release(struct ib_umem *umem);
+int ib_umem_page_count(struct ib_umem *umem);
+
+#else /* CONFIG_INFINIBAND_USER_MEM */
+
+#include <linux/err.h>
+
+static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+                                         unsigned long addr, size_t size,
+                                         int access) {
+       return ERR_PTR(-EINVAL);
+}
+static inline void ib_umem_release(struct ib_umem *umem) { }
+static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
+
+#endif /* CONFIG_INFINIBAND_USER_MEM */
+
+#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5342ac6..47cefca 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -5,7 +5,7 @@
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -710,6 +710,7 @@ struct ib_ucontext {
        struct list_head        qp_list;
        struct list_head        srq_list;
        struct list_head        ah_list;
+       int                     closing;
 };
 
 struct ib_uobject {
@@ -723,23 +724,6 @@ struct ib_uobject {
        int                     live;
 };
 
-struct ib_umem {
-       unsigned long           user_base;
-       unsigned long           virt_base;
-       size_t                  length;
-       int                     offset;
-       int                     page_size;
-       int                     writable;
-       struct list_head        chunk_list;
-};
-
-struct ib_umem_chunk {
-       struct list_head        list;
-       int                     nents;
-       int                     nmap;
-       struct scatterlist      page_list[0];
-};
-
 struct ib_udata {
        void __user *inbuf;
        void __user *outbuf;
@@ -752,11 +736,6 @@ struct ib_udata {
         ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -        \
          (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
 
-struct ib_umem_object {
-       struct ib_uobject       uobject;
-       struct ib_umem          umem;
-};
-
 struct ib_pd {
        struct ib_device       *device;
        struct ib_uobject      *uobject;
@@ -1003,7 +982,8 @@ struct ib_device {
                                                  int mr_access_flags,
                                                  u64 *iova_start);
        struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd,
-                                                 struct ib_umem *region,
+                                                 u64 start, u64 length,
+                                                 u64 virt_addr,
                                                  int mr_access_flags,
                                                  struct ib_udata *udata);
        int                        (*query_mr)(struct ib_mr *mr,
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to