KAMEZAWA Hiroyuki wrote:
> An experimental patch.
> 
> This patch adds an interface to uncharge all pages in memory cgroup if
> no tasks are in it. By this, a user can remove cgroup by 'rmdir'.
> 
> To uncharge all remaing pages in cgrop, echo -n 0 to memory.control_type.
> (Just for test, please advise me about better interface.
>  I think 'rmdir' automatically do this is an one way.)
> 
> Following is my session:
> ==
>    [EMAIL PROTECTED] kamezawa]# mkdir /opt/cgroup/group_A
>    [EMAIL PROTECTED] kamezawa]# bash
>    [EMAIL PROTECTED] kamezawa]# echo $$ > /opt/cgroup/group_A/tasks
>    [EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
>    122880
>    [EMAIL PROTECTED] kamezawa]# cp ./tmpfile tmpfile2
>    [EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
>    8597504
>    [EMAIL PROTECTED] kamezawa]# exit
>    exit
>    [EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
>    8454144
>    [EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/tasks
> (*)[EMAIL PROTECTED] kamezawa]# echo -n 0 > 
> /opt/cgroup/group_A/memory.control_type
>    [EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
>    0
>    [EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/tasks
>    [EMAIL PROTECTED] kamezawa]# rmdir /opt/cgroup/group_A
>    [EMAIL PROTECTED] kamezawa]# exit
> ==
> In above case, a user can't remove group_A because of 8453144 bytes of
> page cache. By (*), all page caches are uncharged.
> 
> uncharged pages will be charged again if some process accesses it later.
> (or dropped by kswapd.)
> 
> p.s.
>  extra consideration about currently mapped pages (recharge it immediately)
>  will be needed ?
> 
> Signed-off-by: KAMEZAWA Hiroyuki <[EMAIL PROTECTED]>
> 
> 
> 
>  mm/memcontrol.c |   93 
> +++++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 83 insertions(+), 10 deletions(-)
> 
> Index: linux-2.6.23-rc8-mm1/mm/memcontrol.c
> ===================================================================
> --- linux-2.6.23-rc8-mm1.orig/mm/memcontrol.c
> +++ linux-2.6.23-rc8-mm1/mm/memcontrol.c
> @@ -424,17 +424,80 @@ void mem_cgroup_uncharge(struct page_cgr
>       if (atomic_dec_and_test(&pc->ref_cnt)) {
>               page = pc->page;
>               lock_page_cgroup(page);
> -             mem = pc->mem_cgroup;
> -             css_put(&mem->css);
> -             page_assign_page_cgroup(page, NULL);
> -             unlock_page_cgroup(page);
> -             res_counter_uncharge(&mem->res, PAGE_SIZE);
> +             pc = page_get_page_cgroup(page);
> +             if (pc) {
> +                     mem = pc->mem_cgroup;
> +                     css_put(&mem->css);
> +                     page_assign_page_cgroup(page, NULL);
> +                     unlock_page_cgroup(page);
> +                     res_counter_uncharge(&mem->res, PAGE_SIZE);
> +                     spin_lock_irqsave(&mem->lru_lock, flags);
> +                     list_del_init(&pc->lru);
> +                     spin_unlock_irqrestore(&mem->lru_lock, flags);
> +                     kfree(pc);
> +             } else
> +                     unlock_page_cgroup(page);
> +     }
> +}

This looks like a bug fix in mem_cgroup_uncharge(). Did you hit a
condition of simultaneous free? Could we split this up into a separate
patch please.

> +/*
> + * Uncharge pages in force. If the page is accessed again, it will be 
> recharged by
> + * other cgroup.
> + *
> + * mem->lru_lock guarantees no-race with mem_cgroup_isolate_pages()
> + * lock_page_cgroup() -> pc = page_get_page_cgroup() guarantees no-race with
> + * mem_cgroup_uncharge().
> + */
> 
> -             spin_lock_irqsave(&mem->lru_lock, flags);
> -             list_del_init(&pc->lru);
> -             spin_unlock_irqrestore(&mem->lru_lock, flags);
> -             kfree(pc);
> +static void
> +mem_cgroup_forced_uncharge_list(struct mem_cgroup *mem, struct list_head 
> *src)
> +{
> +     struct page_cgroup *pc;
> +     struct page *page;
> +     int count = SWAP_CLUSTER_MAX;
> +
> +     spin_lock(&mem->lru_lock);

I think this should be spin_lock_irqsave.

> +     while (!list_empty(src)) {
> +             pc = list_entry(src->prev, struct page_cgroup, lru);
> +             /* When we uncharge page, pc->page is not cleared before
> +                pc is removed from LRU. But, page->pc will be cleared. */

Comment style needs fixing

> +             page = pc->page;
> +             lock_page_cgroup(page);
> +             pc = page_get_page_cgroup(page);
> +             /* check race */
> +             if (pc) {
> +                     css_put(&mem->css);
> +                     page_assign_page_cgroup(page, NULL);
> +                     unlock_page_cgroup(page);
> +                     res_counter_uncharge(&mem->res, PAGE_SIZE);
> +                     list_del_init(&pc->lru);
> +                     kfree(pc);
> +             } else
> +                     unlock_page_cgroup(page);
> +             if (--count == 0) {
> +                     spin_unlock(&mem->lru_lock);
> +                     cond_resched();
> +                     spin_lock(&mem->lru_lock);
> +                     count = SWAP_CLUSTER_MAX;
> +             }
> +     }
> +     spin_unlock(&mem->lru_lock);
> +}

The forced_uncharge_list is one way of doing it, the other
way is to use a shrink_all_memory() logic. For now, I think
this should be fine.

> +
> +int mem_cgroup_forced_uncharge_all(struct mem_cgroup *mem)
> +{
> +     int ret = -EBUSY;
> +     css_get(&mem->css);
> +     while (!list_empty(&mem->active_list) ||
> +            !list_empty(&mem->inactive_list)) {
> +             if (atomic_read(&mem->css.cgroup->count) > 0)
> +                     goto out;
> +             mem_cgroup_forced_uncharge_list(mem, &mem->active_list);
> +             mem_cgroup_forced_uncharge_list(mem, &mem->inactive_list);
>       }
> +     ret = 0;
> +out:
> +     css_put(&mem->css);
> +     return ret;
>  }
> 
>  int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
> @@ -494,7 +557,17 @@ static ssize_t mem_control_type_write(st
>       if (*end != '\0')
>               goto out_free;
> 
> -     if (tmp <= MEM_CGROUP_TYPE_UNSPEC || tmp >= MEM_CGROUP_TYPE_MAX)
> +     if (tmp == MEM_CGROUP_TYPE_UNSPEC) {
> +             if (atomic_read(&mem->css.cgroup->count) == 0)  /* uncharge all 
> */
> +                     ret = mem_cgroup_forced_uncharge_all(mem);
> +             else
> +                     ret = -EBUSY;
> +             if (!ret)
> +                     ret = nbytes;
> +             goto out_free;
> +     }
> +

Can we use a different file for this? Something like
memory.force_reclaim or memory.force_out_memory?

> +     if (tmp < MEM_CGROUP_TYPE_UNSPEC || tmp >= MEM_CGROUP_TYPE_MAX)
>               goto out_free;
> 
>       mem->control_type = tmp;
> 

Overall, the patch looks good. I am going to stress test this patch.

-- 
        Warm Regards,
        Balbir Singh
        Linux Technology Center
        IBM, ISTL
_______________________________________________
Containers mailing list
[EMAIL PROTECTED]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to