User has to mmap user_header and user_index vmalloce'd pointers in order to
consume events from userspace.  Support mapping with possibility to mremap()
in the future, i.e. vma does not have VM_DONTEXPAND flag set.

User mmaps two pointers: header and index in order to expand both calling
mremap().

Expanding is made with support of the fault callback, where page is mmaped
with all appropriate size checks.

Signed-off-by: Roman Penyaev <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: Al Viro <[email protected]>
Cc: "Paul E. McKenney" <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andrea Parri <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
 fs/eventpoll.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5de640fcf28b..2849b238f80b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1388,11 +1388,96 @@ static void ep_show_fdinfo(struct seq_file *m, struct 
file *f)
 }
 #endif
 
+static vm_fault_t ep_eventpoll_fault(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       struct eventpoll *ep = vma->vm_file->private_data;
+       size_t off = vmf->address - vma->vm_start;
+       vm_fault_t ret;
+       int rc;
+
+       mutex_lock(&ep->mtx);
+       ret = VM_FAULT_SIGBUS;
+       if (!vma->vm_pgoff) {
+               if (ep->header_length < (off + PAGE_SIZE))
+                       goto unlock_and_out;
+
+               rc = remap_vmalloc_range_partial(vma, vmf->address,
+                                                ep->user_header + off,
+                                                PAGE_SIZE);
+       } else {
+               if (ep->index_length < (off + PAGE_SIZE))
+                       goto unlock_and_out;
+
+               rc = remap_vmalloc_range_partial(vma, vmf->address,
+                                                ep->user_index + off,
+                                                PAGE_SIZE);
+       }
+       if (likely(!rc)) {
+               /* Success path */
+               vma->vm_flags &= ~VM_DONTEXPAND;
+               ret = VM_FAULT_NOPAGE;
+       }
+unlock_and_out:
+       mutex_unlock(&ep->mtx);
+
+       return ret;
+}
+
+static const struct vm_operations_struct eventpoll_vm_ops = {
+       .fault = ep_eventpoll_fault,
+};
+
+static int ep_eventpoll_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+       struct eventpoll *ep = vma->vm_file->private_data;
+       size_t size;
+       int rc;
+
+       if (!ep_polled_by_user(ep))
+               return -ENOTSUPP;
+
+       mutex_lock(&ep->mtx);
+       rc = -ENXIO;
+       size = vma->vm_end - vma->vm_start;
+       if (!vma->vm_pgoff && size > ep->header_length)
+               goto unlock_and_out;
+       if (vma->vm_pgoff && ep->header_length != (vma->vm_pgoff << PAGE_SHIFT))
+               /*
+                * Index ring starts exactly after header. In future vm_pgoff
+                * is not used, only as indication what kernel ptr is mapped.
+                */
+               goto unlock_and_out;
+       if (vma->vm_pgoff && size > ep->index_length)
+               goto unlock_and_out;
+
+       /*
+        * vm_pgoff is used *only* for indication, what is mapped: user header
+        * or user index ring.
+        */
+       if (!vma->vm_pgoff)
+               rc = remap_vmalloc_range_partial(vma, vma->vm_start,
+                                                ep->user_header, size);
+       else
+               rc = remap_vmalloc_range_partial(vma, vma->vm_start,
+                                                ep->user_index, size);
+
+       if (likely(!rc)) {
+               vma->vm_flags &= ~VM_DONTEXPAND;
+               vma->vm_ops = &eventpoll_vm_ops;
+       }
+unlock_and_out:
+       mutex_unlock(&ep->mtx);
+
+       return rc;
+}
+
 /* File callbacks that implement the eventpoll file behaviour */
 static const struct file_operations eventpoll_fops = {
 #ifdef CONFIG_PROC_FS
        .show_fdinfo    = ep_show_fdinfo,
 #endif
+       .mmap           = ep_eventpoll_mmap,
        .release        = ep_eventpoll_release,
        .poll           = ep_eventpoll_poll,
        .llseek         = noop_llseek,
-- 
2.19.1

Reply via email to