* KAMEZAWA Hiroyuki <[EMAIL PROTECTED]> [2008-06-19 19:22:27]:

> On Thu, 19 Jun 2008 12:24:29 +0900
> KAMEZAWA Hiroyuki <[EMAIL PROTECTED]> wrote:
> 
> > On Thu, 19 Jun 2008 08:43:43 +0530
> > Balbir Singh <[EMAIL PROTECTED]> wrote:
> > 
> > >
> > > > I think the charge of the new group goes to minus. right ?
> > > > (and old group's charge never goes down.)
> > > > I don't think this is "no problem".
> > > > 
> > > > What kind of patch is necessary to fix this ?
> > > > task_attach() should be able to fail in future ?
> > > > 
> > > > I'm sorry if I misunderstand something or this is already in TODO list.
> > > > 
> > > 
> > > It's already on the TODO list. Thanks for keeping me reminded about it.
> > > 
> > Okay, I'm looking foward to see how can_attach and roll-back(if necessary)
> > is implemnted.
> > As you know, I'm interested in how to handle failure of task move.
> > 
> One more thing...
> Now, charge is done at
> 
>  - vm is inserted (special case?)
>  - vm is expanded (mmap is called, stack growth...)
> 
> And uncharge is done at
>  - vm is removed (success of munmap)
>  - exit_mm is called (exit of process)
> 
> But it seems charging at may_expand_vm() is not good.
> The mmap can fail after may_expand_vm() because of various reason,
> but charge is already done at may_expand_vm()....and no roll-back.
> 
Hi, Kamezawa-San,

The patch I have below addresses the issue. FYI: I do see some
variation in memrlimit.usage_in_bytes after running ls, but it's not
that much. I verified that the patch behaves correctly, by doing
a cat of memrlimit.usage_in_bytes from another shell (not associated
with test group) and then compared that value to /proc/$$/statm
($$ being the pid of the task belonging to the test group).

Could you please review/test the patch below? If it works OK, I'll
request Andrew to pick it up.

Description
-----------
memrlimit cgroup does not handle error cases after may_expand_vm(). This
BUG was reported by Kamezawa, with the test case below to reproduce it

[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes
71921664
[EMAIL PROTECTED] kamezawa]# ulimit -s 3
[EMAIL PROTECTED] kamezawa]# ls
Killed
[EMAIL PROTECTED] kamezawa]# ls
Killed
[EMAIL PROTECTED] kamezawa]# ls
Killed
[EMAIL PROTECTED] kamezawa]# ls
Killed
[EMAIL PROTECTED] kamezawa]# ls
Killed
[EMAIL PROTECTED] kamezawa]# ulimit -s unlimited
[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/test/memrlimit.usage_in_bytes
72368128
[EMAIL PROTECTED] kamezawa]#

This patch adds better handling support to fix the reported problem.

Reported-By: [EMAIL PROTECTED]
Signed-off-by: Balbir Singh <[EMAIL PROTECTED]>
---

 mm/mmap.c   |   36 +++++++++++++++++++++++++-----------
 mm/mremap.c |    6 ++++++
 2 files changed, 31 insertions(+), 11 deletions(-)

diff -puN mm/mmap.c~memrlimit-cgroup-add-better-error-handling mm/mmap.c
--- linux-2.6.26-rc5/mm/mmap.c~memrlimit-cgroup-add-better-error-handling       
2008-06-19 21:12:46.000000000 +0530
+++ linux-2.6.26-rc5-balbir/mm/mmap.c   2008-06-19 21:39:45.000000000 +0530
@@ -1123,7 +1123,7 @@ munmap_back:
                         */
                        charged = len >> PAGE_SHIFT;
                        if (security_vm_enough_memory(charged))
-                               return -ENOMEM;
+                               goto undo_charge;
                        vm_flags |= VM_ACCOUNT;
                }
        }
@@ -1245,6 +1245,8 @@ free_vma:
 unacct_error:
        if (charged)
                vm_unacct_memory(charged);
+undo_charge:
+       memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT);
        return error;
 }
 
@@ -1540,14 +1542,15 @@ static int acct_stack_growth(struct vm_a
        struct mm_struct *mm = vma->vm_mm;
        struct rlimit *rlim = current->signal->rlim;
        unsigned long new_start;
+       int ret = -ENOMEM;
 
        /* address space limit tests */
        if (!may_expand_vm(mm, grow))
-               return -ENOMEM;
+               goto out;
 
        /* Stack limit test */
        if (size > rlim[RLIMIT_STACK].rlim_cur)
-               return -ENOMEM;
+               goto undo_charge;
 
        /* mlock limit tests */
        if (vma->vm_flags & VM_LOCKED) {
@@ -1556,21 +1559,23 @@ static int acct_stack_growth(struct vm_a
                locked = mm->locked_vm + grow;
                limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
                if (locked > limit && !capable(CAP_IPC_LOCK))
-                       return -ENOMEM;
+                       goto undo_charge;
        }
 
        /* Check to ensure the stack will not grow into a hugetlb-only region */
        new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
                        vma->vm_end - size;
-       if (is_hugepage_only_range(vma->vm_mm, new_start, size))
-               return -EFAULT;
+       if (is_hugepage_only_range(vma->vm_mm, new_start, size)) {
+               ret = -EFAULT;
+               goto undo_charge;
+       }
 
        /*
         * Overcommit..  This must be the final test, as it will
         * update security statistics.
         */
        if (security_vm_enough_memory(grow))
-               return -ENOMEM;
+               goto undo_charge;
 
        /* Ok, everything looks good - let it rip */
        mm->total_vm += grow;
@@ -1578,6 +1583,11 @@ static int acct_stack_growth(struct vm_a
                mm->locked_vm += grow;
        vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
        return 0;
+undo_charge:
+       /* Undo memrlimit charge */
+       memrlimit_cgroup_uncharge_as(mm, grow);
+out:
+       return ret;
 }
 
 #if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
@@ -1982,6 +1992,7 @@ unsigned long do_brk(unsigned long addr,
        struct rb_node ** rb_link, * rb_parent;
        pgoff_t pgoff = addr >> PAGE_SHIFT;
        int error;
+       int ret = -ENOMEM;
 
        len = PAGE_ALIGN(len);
        if (!len)
@@ -2035,13 +2046,13 @@ unsigned long do_brk(unsigned long addr,
 
        /* Check against address space limits *after* clearing old maps... */
        if (!may_expand_vm(mm, len >> PAGE_SHIFT))
-               return -ENOMEM;
+               return ret;
 
        if (mm->map_count > sysctl_max_map_count)
-               return -ENOMEM;
+               goto undo_charge;
 
        if (security_vm_enough_memory(len >> PAGE_SHIFT))
-               return -ENOMEM;
+               goto undo_charge;
 
        /* Can we just expand an old private anonymous mapping? */
        vma = vma_merge(mm, prev, addr, addr + len, flags,
@@ -2055,7 +2066,7 @@ unsigned long do_brk(unsigned long addr,
        vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
        if (!vma) {
                vm_unacct_memory(len >> PAGE_SHIFT);
-               return -ENOMEM;
+               goto undo_charge;
        }
 
        vma->vm_mm = mm;
@@ -2073,6 +2084,9 @@ out:
                        mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
        }
        return addr;
+undo_charge:
+       memrlimit_cgroup_uncharge_as(mm, len >> PAGE_SHIFT);
+       return ret;
 }
 
 EXPORT_SYMBOL(do_brk);
diff -puN mm/mremap.c~memrlimit-cgroup-add-better-error-handling mm/mremap.c
--- linux-2.6.26-rc5/mm/mremap.c~memrlimit-cgroup-add-better-error-handling     
2008-06-19 21:12:46.000000000 +0530
+++ linux-2.6.26-rc5-balbir/mm/mremap.c 2008-06-19 22:00:02.000000000 +0530
@@ -18,6 +18,7 @@
 #include <linux/highmem.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/memrlimitcgroup.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -256,6 +257,7 @@ unsigned long do_mremap(unsigned long ad
        struct vm_area_struct *vma;
        unsigned long ret = -EINVAL;
        unsigned long charged = 0;
+       int vm_expanded = 0;
 
        if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
                goto out;
@@ -349,6 +351,7 @@ unsigned long do_mremap(unsigned long ad
                goto out;
        }
 
+       vm_expanded = 1;
        if (vma->vm_flags & VM_ACCOUNT) {
                charged = (new_len - old_len) >> PAGE_SHIFT;
                if (security_vm_enough_memory(charged))
@@ -411,6 +414,9 @@ out:
        if (ret & ~PAGE_MASK)
                vm_unacct_memory(charged);
 out_nc:
+       if (vm_expanded)
+               memrlimit_cgroup_uncharge_as(mm,
+                               (new_len - old_len) >> PAGE_SHIFT);
        return ret;
 }
 
_
-- 
        Warm Regards,
        Balbir Singh
        Linux Technology Center
        IBM, ISTL
_______________________________________________
Containers mailing list
[EMAIL PROTECTED]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to