Following patch is for successful rmdir in memory cgroup.
Currently, rmdir against memory cgroup will fail if charged page caches are in
cgroup.
(even if no tasks in cgroup.)
I don't like this. This patch implements "forced" uncharge against memory
cgroup.
Uncharged pages will be charged again when some other cgroup/task accesses it.
I wonder that all unmapped page-caches in cgroup should be uncharged if "1" is
written to
control_type.
I'm grad if I can hear memory controller's policy about "rmdir" against
no-task-cgroup.
Thanks,
-Kame
==
An experimental patch.
This patch adds an interface to uncharge all pages in memory cgroup if
no tasks are in it. By this, a user can remove cgroup by 'rmdir'.
To uncharge all remaing pages in cgrop, echo -n 0 to memory.control_type.
(Just for test, please advise me about better interface.
I think 'rmdir' automatically do this is an one way.)
Following is my session:
==
[EMAIL PROTECTED] kamezawa]# mkdir /opt/cgroup/group_A
[EMAIL PROTECTED] kamezawa]# bash
[EMAIL PROTECTED] kamezawa]# echo $$ > /opt/cgroup/group_A/tasks
[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
122880
[EMAIL PROTECTED] kamezawa]# cp ./tmpfile tmpfile2
[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
8597504
[EMAIL PROTECTED] kamezawa]# exit
exit
[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
8454144
[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/tasks
(*)[EMAIL PROTECTED] kamezawa]# echo -n 0 >
/opt/cgroup/group_A/memory.control_type
[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/memory.usage_in_bytes
0
[EMAIL PROTECTED] kamezawa]# cat /opt/cgroup/group_A/tasks
[EMAIL PROTECTED] kamezawa]# rmdir /opt/cgroup/group_A
[EMAIL PROTECTED] kamezawa]# exit
==
In above case, a user can't remove group_A because of 8453144 bytes of
page cache. By (*), all page caches are uncharged.
uncharged pages will be charged again if some process accesses it later.
(or dropped by kswapd.)
p.s.
extra consideration about currently mapped pages (recharge it immediately)
will be needed ?
Signed-off-by: KAMEZAWA Hiroyuki <[EMAIL PROTECTED]>
mm/memcontrol.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 83 insertions(+), 10 deletions(-)
Index: linux-2.6.23-rc8-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.23-rc8-mm1.orig/mm/memcontrol.c
+++ linux-2.6.23-rc8-mm1/mm/memcontrol.c
@@ -424,17 +424,80 @@ void mem_cgroup_uncharge(struct page_cgr
if (atomic_dec_and_test(&pc->ref_cnt)) {
page = pc->page;
lock_page_cgroup(page);
- mem = pc->mem_cgroup;
- css_put(&mem->css);
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
- res_counter_uncharge(&mem->res, PAGE_SIZE);
+ pc = page_get_page_cgroup(page);
+ if (pc) {
+ mem = pc->mem_cgroup;
+ css_put(&mem->css);
+ page_assign_page_cgroup(page, NULL);
+ unlock_page_cgroup(page);
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ spin_lock_irqsave(&mem->lru_lock, flags);
+ list_del_init(&pc->lru);
+ spin_unlock_irqrestore(&mem->lru_lock, flags);
+ kfree(pc);
+ } else
+ unlock_page_cgroup(page);
+ }
+}
+/*
+ * Uncharge pages in force. If the page is accessed again, it will be
recharged by
+ * other cgroup.
+ *
+ * mem->lru_lock guarantees no-race with mem_cgroup_isolate_pages()
+ * lock_page_cgroup() -> pc = page_get_page_cgroup() guarantees no-race with
+ * mem_cgroup_uncharge().
+ */
- spin_lock_irqsave(&mem->lru_lock, flags);
- list_del_init(&pc->lru);
- spin_unlock_irqrestore(&mem->lru_lock, flags);
- kfree(pc);
+static void
+mem_cgroup_forced_uncharge_list(struct mem_cgroup *mem, struct list_head *src)
+{
+ struct page_cgroup *pc;
+ struct page *page;
+ int count = SWAP_CLUSTER_MAX;
+
+ spin_lock(&mem->lru_lock);
+ while (!list_empty(src)) {
+ pc = list_entry(src->prev, struct page_cgroup, lru);
+ /* When we uncharge page, pc->page is not cleared before
+ pc is removed from LRU. But, page->pc will be cleared. */
+ page = pc->page;
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ /* check race */
+ if (pc) {
+ css_put(&mem->css);
+ page_assign_page_cgroup(page, NULL);
+ unlock_page_cgroup(page);
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ list_del_init(&pc->lru);
+ kfree(pc);
+ } else
+ unlock_page_cgroup(page);
+ if (--count == 0) {
+ spin_unlock(&mem->lru_lock);
+ cond_resched();
+ spin_lock(&mem->lru_lock);
+ count = SWAP_CLUSTER_MAX;
+ }
+ }
+ spin_unlock(&mem->lru_lock);
+}
+
+int mem_cgroup_forced_uncharge_all(struct mem_cgroup *mem)
+{
+ int ret = -EBUSY;
+ css_get(&mem->css);
+ while (!list_empty(&mem->active_list) ||
+ !list_empty(&mem->inactive_list)) {
+ if (atomic_read(&mem->css.cgroup->count) > 0)
+ goto out;
+ mem_cgroup_forced_uncharge_list(mem, &mem->active_list);
+ mem_cgroup_forced_uncharge_list(mem, &mem->inactive_list);
}
+ ret = 0;
+out:
+ css_put(&mem->css);
+ return ret;
}
int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
@@ -494,7 +557,17 @@ static ssize_t mem_control_type_write(st
if (*end != '\0')
goto out_free;
- if (tmp <= MEM_CGROUP_TYPE_UNSPEC || tmp >= MEM_CGROUP_TYPE_MAX)
+ if (tmp == MEM_CGROUP_TYPE_UNSPEC) {
+ if (atomic_read(&mem->css.cgroup->count) == 0) /* uncharge all
*/
+ ret = mem_cgroup_forced_uncharge_all(mem);
+ else
+ ret = -EBUSY;
+ if (!ret)
+ ret = nbytes;
+ goto out_free;
+ }
+
+ if (tmp < MEM_CGROUP_TYPE_UNSPEC || tmp >= MEM_CGROUP_TYPE_MAX)
goto out_free;
mem->control_type = tmp;
_______________________________________________
Containers mailing list
[EMAIL PROTECTED]
https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel