Here are three additional patches missed from previous merge. I stack
them in a row for easier appliance (see numbers in patchnames).

        Cyrill
>From 8764b338b37524ab1a78aee527318ebee9762487 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <[email protected]>
Date: Thu, 9 Oct 2014 15:27:32 -0700
Subject: [PATCH] mm: use may_adjust_brk helper

ML: 8764b338b37524ab1a78aee527318ebee9762487

https://jira.sw.ru/browse/PSBM-39834

Signed-off-by: Cyrill Gorcunov <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Andrew Vagin <[email protected]>
Cc: Eric W. Biederman <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Acked-by: Serge Hallyn <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Cc: Vasiliy Kulikov <[email protected]>
Cc: KAMEZAWA Hiroyuki <[email protected]>
Cc: Michael Kerrisk <[email protected]>
Cc: Julien Tinnes <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Cyrill Gorcunov <[email protected]>
---
 kernel/sys.c |   11 ++++-------
 mm/mmap.c    |    7 +++----
 2 files changed, 7 insertions(+), 11 deletions(-)

Index: linux-pcs7.git/kernel/sys.c
===================================================================
--- linux-pcs7.git.orig/kernel/sys.c
+++ linux-pcs7.git/kernel/sys.c
@@ -2279,7 +2279,6 @@ out:
 static int prctl_set_mm(int opt, unsigned long addr,
                        unsigned long arg4, unsigned long arg5)
 {
-       unsigned long rlim = rlimit(RLIMIT_DATA);
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        int error;
@@ -2330,9 +2329,8 @@ static int prctl_set_mm(int opt, unsigne
                if (addr <= mm->end_data)
                        goto out;
 
-               if (rlim < RLIM_INFINITY &&
-                   (mm->brk - addr) +
-                   (mm->end_data - mm->start_data) > rlim)
+               if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr,
+                                     mm->end_data, mm->start_data))
                        goto out;
 
                mm->start_brk = addr;
@@ -2342,9 +2340,8 @@ static int prctl_set_mm(int opt, unsigne
                if (addr <= mm->end_data)
                        goto out;
 
-               if (rlim < RLIM_INFINITY &&
-                   (addr - mm->start_brk) +
-                   (mm->end_data - mm->start_data) > rlim)
+               if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk,
+                                     mm->end_data, mm->start_data))
                        goto out;
 
                mm->brk = addr;
Index: linux-pcs7.git/mm/mmap.c
===================================================================
--- linux-pcs7.git.orig/mm/mmap.c
+++ linux-pcs7.git/mm/mmap.c
@@ -272,7 +272,7 @@ static unsigned long do_brk(unsigned lon
 
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
-       unsigned long rlim, retval;
+       unsigned long retval;
        unsigned long newbrk, oldbrk;
        struct mm_struct *mm = current->mm;
        unsigned long min_brk;
@@ -302,9 +302,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         * segment grow beyond its set limit the in case where the limit is
         * not page aligned -Ram Gupta
         */
-       rlim = rlimit(RLIMIT_DATA);
-       if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
-                       (mm->end_data - mm->start_data) > rlim)
+       if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
+                             mm->end_data, mm->start_data))
                goto out;
 
        newbrk = PAGE_ALIGN(brk);
>From 4a00e9df293d010acbea118b9521e08cb85016c6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <[email protected]>
Date: Thu, 25 Jun 2015 15:00:51 -0700
Subject: [PATCH] prctl: more prctl(PR_SET_MM_*) checks

ML: 4a00e9df293d010acbea118b9521e08cb85016c6

https://jira.sw.ru/browse/PSBM-39834

From: Alexey Dobriyan <[email protected]>

Individual prctl(PR_SET_MM_*) calls do some checking to maintain a
consistent view of mm->arg_start et al fields, but not enough.  In
particular PR_SET_MM_ARG_START/PR_SET_MM_ARG_END/ R_SET_MM_ENV_START/
PR_SET_MM_ENV_END only check that the address lies in an existing VMA,
but don't check that the start address is lower than the end address _at
all_.

Consolidate all consistency checks, so there will be no difference in
the future between PR_SET_MM_MAP and individual PR_SET_MM_* calls.

The program below makes both ARGV and ENVP areas be reversed.  It makes
/proc/$PID/cmdline show garbage (it doesn't oops by luck).

#include <sys/mman.h>
#include <sys/prctl.h>
#include <unistd.h>

enum {PAGE_SIZE=4096};

int main(void)
{
        void *p;

        p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);

#define PR_SET_MM               35
#define PR_SET_MM_ARG_START     8
#define PR_SET_MM_ARG_END       9
#define PR_SET_MM_ENV_START     10
#define PR_SET_MM_ENV_END       11
        prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long)p + PAGE_SIZE - 1, 
0, 0);
        prctl(PR_SET_MM, PR_SET_MM_ARG_END,   (unsigned long)p, 0, 0);
        prctl(PR_SET_MM, PR_SET_MM_ENV_START, (unsigned long)p + PAGE_SIZE - 1, 
0, 0);
        prctl(PR_SET_MM, PR_SET_MM_ENV_END,   (unsigned long)p, 0, 0);

        pause();
        return 0;
}

[[email protected]: tidy code, tweak comment]
Signed-off-by: Alexey Dobriyan <[email protected]>
Acked-by: Cyrill Gorcunov <[email protected]>
Cc: Jarod Wilson <[email protected]>
Cc: Jan Stancek <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Cyrill Gorcunov <[email protected]>
---
 kernel/sys.c |  158 +++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 91 insertions(+), 67 deletions(-)

Index: linux-pcs7.git/kernel/sys.c
===================================================================
--- linux-pcs7.git.orig/kernel/sys.c
+++ linux-pcs7.git/kernel/sys.c
@@ -2095,7 +2095,6 @@ exit:
        return err;
 }
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
 /*
  * WARNING: we don't require any capability here so be very careful
  * in what is allowed for modification from userspace.
@@ -2191,6 +2190,7 @@ out:
        return error;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long 
data_size)
 {
        struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
@@ -2276,10 +2276,41 @@ out:
 }
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
+static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
+                         unsigned long len)
+{
+       /*
+        * This doesn't move the auxiliary vector itself since it's pinned to
+        * mm_struct, but it permits filling the vector with new values.  It's
+        * up to the caller to provide sane values here, otherwise userspace
+        * tools which use this vector might be unhappy.
+        */
+       unsigned long user_auxv[AT_VECTOR_SIZE];
+
+       if (len > sizeof(user_auxv))
+               return -EINVAL;
+
+       if (copy_from_user(user_auxv, (const void __user *)addr, len))
+               return -EFAULT;
+
+       /* Make sure the last entry is always AT_NULL */
+       user_auxv[AT_VECTOR_SIZE - 2] = 0;
+       user_auxv[AT_VECTOR_SIZE - 1] = 0;
+
+       BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+
+       task_lock(current);
+       memcpy(mm->saved_auxv, user_auxv, len);
+       task_unlock(current);
+
+       return 0;
+}
+
 static int prctl_set_mm(int opt, unsigned long addr,
                        unsigned long arg4, unsigned long arg5)
 {
        struct mm_struct *mm = current->mm;
+       struct prctl_mm_map prctl_map;
        struct vm_area_struct *vma;
        int error;
 
@@ -2303,6 +2334,9 @@ static int prctl_set_mm(int opt, unsigne
                return error;
        }
 
+       if (opt == PR_SET_MM_AUXV)
+               return prctl_set_auxv(mm, addr, arg4);
+
        if (addr >= TASK_SIZE || addr < mmap_min_addr)
                return -EINVAL;
 
@@ -2311,42 +2345,64 @@ static int prctl_set_mm(int opt, unsigne
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, addr);
 
+       prctl_map.start_code    = mm->start_code;
+       prctl_map.end_code      = mm->end_code;
+       prctl_map.start_data    = mm->start_data;
+       prctl_map.end_data      = mm->end_data;
+       prctl_map.start_brk     = mm->start_brk;
+       prctl_map.brk           = mm->brk;
+       prctl_map.start_stack   = mm->start_stack;
+       prctl_map.arg_start     = mm->arg_start;
+       prctl_map.arg_end       = mm->arg_end;
+       prctl_map.env_start     = mm->env_start;
+       prctl_map.env_end       = mm->env_end;
+       prctl_map.auxv          = NULL;
+       prctl_map.auxv_size     = 0;
+       prctl_map.exe_fd        = -1;
+
        switch (opt) {
        case PR_SET_MM_START_CODE:
-               mm->start_code = addr;
+               prctl_map.start_code = addr;
                break;
        case PR_SET_MM_END_CODE:
-               mm->end_code = addr;
+               prctl_map.end_code = addr;
                break;
        case PR_SET_MM_START_DATA:
-               mm->start_data = addr;
+               prctl_map.start_data = addr;
                break;
        case PR_SET_MM_END_DATA:
-               mm->end_data = addr;
+               prctl_map.end_data = addr;
+               break;
+       case PR_SET_MM_START_STACK:
+               prctl_map.start_stack = addr;
                break;
-
        case PR_SET_MM_START_BRK:
-               if (addr <= mm->end_data)
-                       goto out;
-
-               if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr,
-                                     mm->end_data, mm->start_data))
-                       goto out;
-
-               mm->start_brk = addr;
+               prctl_map.start_brk = addr;
                break;
-
        case PR_SET_MM_BRK:
-               if (addr <= mm->end_data)
-                       goto out;
-
-               if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk,
-                                     mm->end_data, mm->start_data))
-                       goto out;
-
-               mm->brk = addr;
+               prctl_map.brk = addr;
+               break;
+       case PR_SET_MM_ARG_START:
+               prctl_map.arg_start = addr;
+               break;
+       case PR_SET_MM_ARG_END:
+               prctl_map.arg_end = addr;
+               break;
+       case PR_SET_MM_ENV_START:
+               prctl_map.env_start = addr;
                break;
+       case PR_SET_MM_ENV_END:
+               prctl_map.env_end = addr;
+               break;
+       default:
+               goto out;
+       }
 
+       error = validate_prctl_map(&prctl_map);
+       if (error)
+               goto out;
+
+       switch (opt) {
        /*
         * If command line arguments and environment
         * are placed somewhere else on stack, we can
@@ -2363,52 +2419,20 @@ static int prctl_set_mm(int opt, unsigne
                        error = -EFAULT;
                        goto out;
                }
-               if (opt == PR_SET_MM_START_STACK)
-                       mm->start_stack = addr;
-               else if (opt == PR_SET_MM_ARG_START)
-                       mm->arg_start = addr;
-               else if (opt == PR_SET_MM_ARG_END)
-                       mm->arg_end = addr;
-               else if (opt == PR_SET_MM_ENV_START)
-                       mm->env_start = addr;
-               else if (opt == PR_SET_MM_ENV_END)
-                       mm->env_end = addr;
-               break;
-
-       /*
-        * This doesn't move auxiliary vector itself
-        * since it's pinned to mm_struct, but allow
-        * to fill vector with new values. It's up
-        * to a caller to provide sane values here
-        * otherwise user space tools which use this
-        * vector might be unhappy.
-        */
-       case PR_SET_MM_AUXV: {
-               unsigned long user_auxv[AT_VECTOR_SIZE];
-
-               if (arg4 > sizeof(user_auxv))
-                       goto out;
-               up_read(&mm->mmap_sem);
-
-               if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
-                       return -EFAULT;
-
-               /* Make sure the last entry is always AT_NULL */
-               user_auxv[AT_VECTOR_SIZE - 2] = 0;
-               user_auxv[AT_VECTOR_SIZE - 1] = 0;
-
-               BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
-
-               task_lock(current);
-               memcpy(mm->saved_auxv, user_auxv, arg4);
-               task_unlock(current);
-
-               return 0;
-       }
-       default:
-               goto out;
        }
 
+       mm->start_code  = prctl_map.start_code;
+       mm->end_code    = prctl_map.end_code;
+       mm->start_data  = prctl_map.start_data;
+       mm->end_data    = prctl_map.end_data;
+       mm->start_brk   = prctl_map.start_brk;
+       mm->brk         = prctl_map.brk;
+       mm->start_stack = prctl_map.start_stack;
+       mm->arg_start   = prctl_map.arg_start;
+       mm->arg_end     = prctl_map.arg_end;
+       mm->env_start   = prctl_map.env_start;
+       mm->env_end     = prctl_map.env_end;
+
        error = 0;
 out:
        up_read(&mm->mmap_sem);
From: Cyrill Gorcunov <[email protected]>
Subject: [PATCH] prctl: avoid using mmap_sem for exe_file serialization

ML: 6e399cd144d8500ffb5d40fa6848890e2580a80a

https://jira.sw.ru/browse/PSBM-39834

From: Davidlohr Bueso <[email protected]>

Oleg cleverly suggested using xchg() to set the new mm->exe_file instead
of calling set_mm_exe_file() which requires some form of serialization --
mmap_sem in this case.  For archs that do not have atomic rmw instructions
we still fallback to a spinlock alternative, so this should always be
safe.  As such, we only need the mmap_sem for looking up the backing
vm_file, which can be done sharing the lock.  Naturally, this means we
need to manually deal with both the new and old file reference counting,
and we need not worry about the MMF_EXE_FILE_CHANGED bits, which can
probably be deleted in the future anyway.

Signed-off-by: Davidlohr Bueso <[email protected]>
Suggested-by: Oleg Nesterov <[email protected]>
Acked-by: Oleg Nesterov <[email protected]>
Reviewed-by: Konstantin Khlebnikov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Cyrill Gorcunov <[email protected]>
---
 kernel/sys.c |   47 ++++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 19 deletions(-)

Index: linux-pcs7.git/kernel/sys.c
===================================================================
--- linux-pcs7.git.orig/kernel/sys.c
+++ linux-pcs7.git/kernel/sys.c
@@ -2036,14 +2036,13 @@ SYSCALL_DEFINE1(umask, int, mask)
        return mask;
 }
 
-static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd)
+static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 {
        struct fd exe;
+       struct file *old_exe, *exe_file;
        struct inode *inode;
        int err;
 
-       VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
        exe = fdget(fd);
        if (!exe.file)
                return -EBADF;
@@ -2067,15 +2066,22 @@ static int prctl_set_mm_exe_file_locked(
        /*
         * Forbid mm->exe_file change if old file still mapped.
         */
+       exe_file = get_mm_exe_file(mm);
        err = -EBUSY;
-       if (mm->exe_file) {
+       if (exe_file) {
                struct vm_area_struct *vma;
 
-               for (vma = mm->mmap; vma; vma = vma->vm_next)
-                       if (vma->vm_file &&
-                           path_equal(&vma->vm_file->f_path,
-                                      &mm->exe_file->f_path))
-                               goto exit;
+               down_read(&mm->mmap_sem);
+               for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                       if (!vma->vm_file)
+                               continue;
+                       if (path_equal(&vma->vm_file->f_path,
+                                      &exe_file->f_path))
+                               goto exit_err;
+               }
+
+               up_read(&mm->mmap_sem);
+               fput(exe_file);
        }
 
        /*
@@ -2089,10 +2095,18 @@ static int prctl_set_mm_exe_file_locked(
                goto exit;
 
        err = 0;
-       set_mm_exe_file(mm, exe.file);  /* this grabs a reference to exe.file */
+       /* set the new file, lockless */
+       get_file(exe.file);
+       old_exe = xchg(&mm->exe_file, exe.file);
+       if (old_exe)
+               fput(old_exe);
 exit:
        fdput(exe);
        return err;
+exit_err:
+       up_read(&mm->mmap_sem);
+       fput(exe_file);
+       goto exit;
 }
 
 /*
@@ -2227,10 +2241,9 @@ static int prctl_set_mm_map(int opt, con
                user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
        }
 
-       down_write(&mm->mmap_sem);
        if (prctl_map.exe_fd != (u32)-1)
-               error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
-       downgrade_write(&mm->mmap_sem);
+               error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd);
+       down_read(&mm->mmap_sem);
        if (error)
                goto out;
 
@@ -2327,12 +2340,8 @@ static int prctl_set_mm(int opt, unsigne
        if (!capable(CAP_SYS_RESOURCE))
                return -EPERM;
 
-       if (opt == PR_SET_MM_EXE_FILE) {
-               down_write(&mm->mmap_sem);
-               error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr);
-               up_write(&mm->mmap_sem);
-               return error;
-       }
+       if (opt == PR_SET_MM_EXE_FILE)
+               return prctl_set_mm_exe_file(mm, (unsigned int)addr);
 
        if (opt == PR_SET_MM_AUXV)
                return prctl_set_auxv(mm, addr, arg4);
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to