Hello,

Summary:
If I call ibv_close_device() on the context with unregistered memory
process hangs in D state.

This is what happens:
ibv_close_device() close cmd_fd and then calls free_context().
free_context() calls munmap to unmap doorbell registers.
In kernel sys_munmap gets mm->mmap_sem semaphore and calls do_munmap.
do_munmap is the last user of the file so it calls release method of
the file (ib_uverbs_close() in our case). ib_uverbs_close() calls
ib_dealloc_ucontext(). ib_dealloc_ucontext() notices that there is 
unregistered memory on the file and calls ib_umem_release(). And there
we are trying to acquire mm->mmap_sem on more time.

One way to solve this problem is to call munmap before close in
ibv_close_device() but this will not stop malicious user so this is
not enough.

In attached patch I use down_write_trylock() instead of down_write()
in ib_umem_release(). If semaphore is already locked we will not update
locked_vm statistics. This way malicious user can only cause harm to
itself. 

The solution is not ideal since if some other process holds mmap_sem
(for instance do 'cat /proc/pid/maps') we will not be able to update
locked_vm counter but the chances this happening are close to zero.


Index: trunk/src/userspace/libibverbs/src/device.c
===================================================================
--- trunk/src/userspace/libibverbs/src/device.c (revision 2715)
+++ trunk/src/userspace/libibverbs/src/device.c (working copy)
@@ -121,10 +121,9 @@
        close(context->async_fd);
        for (i = 0; i < context->num_comp; ++i)
                close(context->cq_fd[i]);
+       context->device->ops.free_context(context);
        close(context->cmd_fd);
 
-       context->device->ops.free_context(context);
-
        return 0;
 }
 
Index: trunk/src/linux-kernel/infiniband/core/uverbs_mem.c
===================================================================
--- trunk/src/linux-kernel/infiniband/core/uverbs_mem.c (revision 2715)
+++ trunk/src/linux-kernel/infiniband/core/uverbs_mem.c (working copy)
@@ -163,18 +163,19 @@
 void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
 {
        struct mm_struct *mm;
+       int semlocked = 0;
 
        mm = get_task_mm(current);
 
-       if (mm) {
-               down_write(&mm->mmap_sem);
+       if (mm && (semlocked = down_write_trylock (&mm->mmap_sem))) {
                mm->locked_vm -= PAGE_ALIGN(umem->length + umem->offset) >> 
PAGE_SHIFT;
        }
 
        __ib_umem_release(dev, umem, 1);
 
        if (mm) {
-               up_write(&mm->mmap_sem);
+               if (semlocked)
+                       up_write(&mm->mmap_sem);
                mmput(mm);
        }
 }
--
                        Gleb.
Index: trunk/src/userspace/libibverbs/src/device.c
===================================================================
--- trunk/src/userspace/libibverbs/src/device.c (revision 2715)
+++ trunk/src/userspace/libibverbs/src/device.c (working copy)
@@ -121,10 +121,9 @@
        close(context->async_fd);
        for (i = 0; i < context->num_comp; ++i)
                close(context->cq_fd[i]);
+       context->device->ops.free_context(context);
        close(context->cmd_fd);
 
-       context->device->ops.free_context(context);
-
        return 0;
 }
 
Index: trunk/src/linux-kernel/infiniband/core/uverbs_mem.c
===================================================================
--- trunk/src/linux-kernel/infiniband/core/uverbs_mem.c (revision 2715)
+++ trunk/src/linux-kernel/infiniband/core/uverbs_mem.c (working copy)
@@ -163,18 +163,19 @@
 void ib_umem_release(struct ib_device *dev, struct ib_umem *umem)
 {
        struct mm_struct *mm;
+       int semlocked = 0;
 
        mm = get_task_mm(current);
 
-       if (mm) {
-               down_write(&mm->mmap_sem);
+       if (mm && (semlocked = down_write_trylock (&mm->mmap_sem))) {
                mm->locked_vm -= PAGE_ALIGN(umem->length + umem->offset) >> 
PAGE_SHIFT;
        }
 
        __ib_umem_release(dev, umem, 1);
 
        if (mm) {
-               up_write(&mm->mmap_sem);
+               if (semlocked)
+                       up_write(&mm->mmap_sem);
                mmput(mm);
        }
 }
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to