[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure
On Tue, Mar 02, 2010 at 12:04:53PM +0200, Kirill A. Shutemov wrote: [snip] +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item) +{ + return -ENOMEM; Why ENOMEM? Probably, EINVAL or ENOSYS? OK, ENOSYS is more appropriate IMHO. +static s64 mem_cgroup_get_local_page_stat(struct mem_cgroup *memcg, + enum mem_cgroup_page_stat_item item) +{ + s64 ret; + + switch (item) { + case MEMCG_NR_DIRTYABLE_PAGES: + ret = res_counter_read_u64(memcg-res, RES_LIMIT) - + res_counter_read_u64(memcg-res, RES_USAGE); + /* Translate free memory in pages */ + ret = PAGE_SHIFT; + ret += mem_cgroup_read_stat(memcg, LRU_ACTIVE_FILE) + + mem_cgroup_read_stat(memcg, LRU_INACTIVE_FILE); + if (mem_cgroup_can_swap(memcg)) + ret += mem_cgroup_read_stat(memcg, LRU_ACTIVE_ANON) + + mem_cgroup_read_stat(memcg, LRU_INACTIVE_ANON); + break; + case MEMCG_NR_RECLAIM_PAGES: + ret = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_DIRTY) + + mem_cgroup_read_stat(memcg, + MEM_CGROUP_STAT_UNSTABLE_NFS); + break; + case MEMCG_NR_WRITEBACK: + ret = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); + break; + case MEMCG_NR_DIRTY_WRITEBACK_PAGES: + ret = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK) + + mem_cgroup_read_stat(memcg, + MEM_CGROUP_STAT_UNSTABLE_NFS); + break; + default: + ret = 0; + WARN_ON_ONCE(1); I think it's a bug, not warning. OK. + } + return ret; +} + +static int mem_cgroup_page_stat_cb(struct mem_cgroup *mem, void *data) +{ + struct mem_cgroup_page_stat *stat = (struct mem_cgroup_page_stat *)data; + + stat-value += mem_cgroup_get_local_page_stat(mem, stat-item); + return 0; +} + +s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item) +{ + struct mem_cgroup_page_stat stat = {}; + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return -ENOMEM; EINVAL/ENOSYS? OK. + rcu_read_lock(); + memcg = mem_cgroup_from_task(current); + if (memcg) { + /* + * Recursively evaulate page statistics against all cgroup + * under hierarchy tree + */ + stat.item = item; + mem_cgroup_walk_tree(memcg, stat, mem_cgroup_page_stat_cb); + } else + stat.value = -ENOMEM; ditto. OK. + rcu_read_unlock(); + + return stat.value; +} + static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) { int *val = data; @@ -1263,14 +1418,16 @@ static void record_last_oom(struct mem_cgroup *mem) } /* - * Currently used to update mapped file statistics, but the routine can be - * generalized to update other statistics as well. + * Generalized routine to update memory cgroup statistics. */ -void mem_cgroup_update_file_mapped(struct page *page, int val) +void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) EXPORT_SYMBOL_GPL(mem_cgroup_update_stat) is needed, since it uses by filesystems. Agreed. +static int +mem_cgroup_dirty_write(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); + int type = cft-private; + + if (cgrp-parent == NULL) + return -EINVAL; + if (((type == MEM_CGROUP_DIRTY_RATIO) || + (type == MEM_CGROUP_DIRTY_BACKGROUND_RATIO)) (val 100)) Too many unnecessary brackets if ((type == MEM_CGROUP_DIRTY_RATIO || type == MEM_CGROUP_DIRTY_BACKGROUND_RATIO) val 100) OK. Thanks, -Andrea ___ Containers mailing list contain...@lists.linux-foundation.org https://lists.linux-foundation.org/mailman/listinfo/containers ___ Devel mailing list Devel@openvz.org https://openvz.org/mailman/listinfo/devel
[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure
Comments below. Yet to be tested on my end, but I will test it. On Mon, Mar 1, 2010 at 1:23 PM, Andrea Righi ari...@develer.com wrote: Infrastructure to account dirty pages per cgroup and add dirty limit interfaces in the cgroupfs: - Direct write-out: memory.dirty_ratio, memory.dirty_bytes - Background write-out: memory.dirty_background_ratio, memory.dirty_background_bytes Signed-off-by: Andrea Righi ari...@develer.com --- include/linux/memcontrol.h | 77 ++- mm/memcontrol.c | 336 2 files changed, 384 insertions(+), 29 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f9b119..cc88b2e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,12 +19,50 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H + +#include linux/writeback.h #include linux/cgroup.h + struct mem_cgroup; struct page_cgroup; struct page; struct mm_struct; +/* Cgroup memory statistics items exported to the kernel */ +enum mem_cgroup_page_stat_item { + MEMCG_NR_DIRTYABLE_PAGES, + MEMCG_NR_RECLAIM_PAGES, + MEMCG_NR_WRITEBACK, + MEMCG_NR_DIRTY_WRITEBACK_PAGES, +}; + +/* + * Statistics for memory cgroup. + */ +enum mem_cgroup_stat_index { + /* + * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. + */ + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ + MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ + MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ + MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out. + used by soft limit implementation */ + MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out. + used by threshold implementation */ + MEM_CGROUP_STAT_FILE_DIRTY, /* # of dirty pages in page cache */ + MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_WRITEBACK_TEMP, /* # of pages under writeback using + temporary buffers */ + MEM_CGROUP_STAT_UNSTABLE_NFS, /* # of NFS unstable pages */ + + MEM_CGROUP_STAT_NSTATS, +}; + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* * All charge functions with gfp_mask should use GFP_KERNEL or @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern int do_swap_account; #endif +extern long mem_cgroup_dirty_ratio(void); +extern unsigned long mem_cgroup_dirty_bytes(void); +extern long mem_cgroup_dirty_background_ratio(void); +extern unsigned long mem_cgroup_dirty_background_bytes(void); + +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item); + static inline bool mem_cgroup_disabled(void) { if (mem_cgroup_subsys.disabled) @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void) } extern bool mem_cgroup_oom_called(struct task_struct *task); -void mem_cgroup_update_file_mapped(struct page *page, int val); +void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val); unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, int nid, int zid); @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_update_file_mapped(struct page *page, - int val) +static inline void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) { } @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, return 0; } +static inline long mem_cgroup_dirty_ratio(void) +{ + return vm_dirty_ratio; +} + +static inline unsigned long mem_cgroup_dirty_bytes(void) +{ + return vm_dirty_bytes; +} + +static inline long mem_cgroup_dirty_background_ratio(void) +{ + return dirty_background_ratio; +} + +static inline unsigned long mem_cgroup_dirty_background_bytes(void) +{ + return dirty_background_bytes; +} + +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item) +{ + return -ENOMEM; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a443c30..e74cf66 100644 --- a/mm/memcontrol.c
[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure
On Tue, Mar 02, 2010 at 06:32:24PM +0530, Balbir Singh wrote: [snip] +extern long mem_cgroup_dirty_ratio(void); +extern unsigned long mem_cgroup_dirty_bytes(void); +extern long mem_cgroup_dirty_background_ratio(void); +extern unsigned long mem_cgroup_dirty_background_bytes(void); + +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item); + Docstyle comments for each function would be appreciated OK. /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide @@ -205,6 +199,9 @@ struct mem_cgroup { unsigned intswappiness; + /* control memory cgroup dirty pages */ + unsigned long dirty_param[MEM_CGROUP_DIRTY_NPARAMS]; + Could you mention what protects this field, is it the reclaim_lock? Yes, it is. Actually, we could avoid the lock completely for dirty_param[], using a validation routine to check for incoherencies after any read with get_dirty_param(), and retry if the validation fails. In practice, the same approach we're using to read global vm_dirty_ratio, vm_dirty_bytes, etc... Considering that those values are rarely written and read often we can protect them in a RCU way. BTW, is unsigned long sufficient to represent dirty_param(s)? Yes, I think. It's the same type used for the equivalent global values. /* set when res.limit == memsw.limit */ boolmemsw_is_minimum; @@ -1021,6 +1018,164 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg) return swappiness; } +static unsigned long get_dirty_param(struct mem_cgroup *memcg, + enum mem_cgroup_dirty_param idx) +{ + unsigned long ret; + + VM_BUG_ON(idx = MEM_CGROUP_DIRTY_NPARAMS); + spin_lock(memcg-reclaim_param_lock); + ret = memcg-dirty_param[idx]; + spin_unlock(memcg-reclaim_param_lock); Do we need a spinlock if we protect it using RCU? Is precise data very important? See above. +unsigned long mem_cgroup_dirty_background_bytes(void) +{ + struct mem_cgroup *memcg; + unsigned long ret = dirty_background_bytes; + + if (mem_cgroup_disabled()) + return ret; + rcu_read_lock(); + memcg = mem_cgroup_from_task(current); + if (likely(memcg)) + ret = get_dirty_param(memcg, MEM_CGROUP_DIRTY_BACKGROUND_BYTES); + rcu_read_unlock(); + + return ret; +} + +static inline bool mem_cgroup_can_swap(struct mem_cgroup *memcg) +{ + return do_swap_account ? + res_counter_read_u64(memcg-memsw, RES_LIMIT) : Shouldn't you do a res_counter_read_u64(...) 0 for readability? OK. What happens if memcg-res, RES_LIMIT == memcg-memsw, RES_LIMIT? OK, we should also check memcg-memsw_is_minimum. static struct cgroup_subsys_state * __ref mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -3776,8 +4031,37 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) mem-last_scanned_child = 0; spin_lock_init(mem-reclaim_param_lock); - if (parent) + if (parent) { mem-swappiness = get_swappiness(parent); + + spin_lock(parent-reclaim_param_lock); + copy_dirty_params(mem, parent); + spin_unlock(parent-reclaim_param_lock); + } else { + /* +* XXX: should we need a lock here? we could switch from +* vm_dirty_ratio to vm_dirty_bytes or vice versa but we're not +* reading them atomically. The same for dirty_background_ratio +* and dirty_background_bytes. +* +* For now, try to read them speculatively and retry if a +* conflict is detected.a The do while loop is subtle, can we add a validate check,share it with the write routine and retry if validation fails? Agreed. +*/ + do { + mem-dirty_param[MEM_CGROUP_DIRTY_RATIO] = + vm_dirty_ratio; + mem-dirty_param[MEM_CGROUP_DIRTY_BYTES] = + vm_dirty_bytes; + } while (mem-dirty_param[MEM_CGROUP_DIRTY_RATIO] +mem-dirty_param[MEM_CGROUP_DIRTY_BYTES]); + do { + mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_RATIO] = + dirty_background_ratio; + mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_BYTES] = + dirty_background_bytes; + } while (mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_RATIO] + mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_BYTES]); + } atomic_set(mem-refcnt, 1); mem-move_charge_at_immigrate = 0; mutex_init(mem-thresholds_lock); Many thanks for reviewing, -Andrea
[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure
On Tue, Mar 02, 2010 at 10:08:17AM -0800, Greg Thelen wrote: Comments below. Yet to be tested on my end, but I will test it. On Mon, Mar 1, 2010 at 1:23 PM, Andrea Righi ari...@develer.com wrote: Infrastructure to account dirty pages per cgroup and add dirty limit interfaces in the cgroupfs: - Direct write-out: memory.dirty_ratio, memory.dirty_bytes - Background write-out: memory.dirty_background_ratio, memory.dirty_background_bytes Signed-off-by: Andrea Righi ari...@develer.com --- include/linux/memcontrol.h | 77 ++- mm/memcontrol.c | 336 2 files changed, 384 insertions(+), 29 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f9b119..cc88b2e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,12 +19,50 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H + +#include linux/writeback.h #include linux/cgroup.h + struct mem_cgroup; struct page_cgroup; struct page; struct mm_struct; +/* Cgroup memory statistics items exported to the kernel */ +enum mem_cgroup_page_stat_item { + MEMCG_NR_DIRTYABLE_PAGES, + MEMCG_NR_RECLAIM_PAGES, + MEMCG_NR_WRITEBACK, + MEMCG_NR_DIRTY_WRITEBACK_PAGES, +}; + +/* + * Statistics for memory cgroup. + */ +enum mem_cgroup_stat_index { + /* + * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. + */ + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ + MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ + MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ + MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out. + used by soft limit implementation */ + MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out. + used by threshold implementation */ + MEM_CGROUP_STAT_FILE_DIRTY, /* # of dirty pages in page cache */ + MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_WRITEBACK_TEMP, /* # of pages under writeback using + temporary buffers */ + MEM_CGROUP_STAT_UNSTABLE_NFS, /* # of NFS unstable pages */ + + MEM_CGROUP_STAT_NSTATS, +}; + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* * All charge functions with gfp_mask should use GFP_KERNEL or @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern int do_swap_account; #endif +extern long mem_cgroup_dirty_ratio(void); +extern unsigned long mem_cgroup_dirty_bytes(void); +extern long mem_cgroup_dirty_background_ratio(void); +extern unsigned long mem_cgroup_dirty_background_bytes(void); + +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item); + static inline bool mem_cgroup_disabled(void) { if (mem_cgroup_subsys.disabled) @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void) } extern bool mem_cgroup_oom_called(struct task_struct *task); -void mem_cgroup_update_file_mapped(struct page *page, int val); +void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val); unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, int nid, int zid); @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_update_file_mapped(struct page *page, - int val) +static inline void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) { } @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, return 0; } +static inline long mem_cgroup_dirty_ratio(void) +{ + return vm_dirty_ratio; +} + +static inline unsigned long mem_cgroup_dirty_bytes(void) +{ + return vm_dirty_bytes; +} + +static inline long mem_cgroup_dirty_background_ratio(void) +{ + return dirty_background_ratio; +} + +static inline unsigned long mem_cgroup_dirty_background_bytes(void) +{ + return dirty_background_bytes; +} + +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item) +{ +
[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure
On Mon, Mar 1, 2010 at 11:23 PM, Andrea Righi ari...@develer.com wrote: Infrastructure to account dirty pages per cgroup and add dirty limit interfaces in the cgroupfs: - Direct write-out: memory.dirty_ratio, memory.dirty_bytes - Background write-out: memory.dirty_background_ratio, memory.dirty_background_bytes Signed-off-by: Andrea Righi ari...@develer.com --- include/linux/memcontrol.h | 77 ++- mm/memcontrol.c | 336 2 files changed, 384 insertions(+), 29 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f9b119..cc88b2e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,12 +19,50 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H + +#include linux/writeback.h #include linux/cgroup.h + struct mem_cgroup; struct page_cgroup; struct page; struct mm_struct; +/* Cgroup memory statistics items exported to the kernel */ +enum mem_cgroup_page_stat_item { + MEMCG_NR_DIRTYABLE_PAGES, + MEMCG_NR_RECLAIM_PAGES, + MEMCG_NR_WRITEBACK, + MEMCG_NR_DIRTY_WRITEBACK_PAGES, +}; + +/* + * Statistics for memory cgroup. + */ +enum mem_cgroup_stat_index { + /* + * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. + */ + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ + MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ + MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ + MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out. + used by soft limit implementation */ + MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out. + used by threshold implementation */ + MEM_CGROUP_STAT_FILE_DIRTY, /* # of dirty pages in page cache */ + MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_WRITEBACK_TEMP, /* # of pages under writeback using + temporary buffers */ + MEM_CGROUP_STAT_UNSTABLE_NFS, /* # of NFS unstable pages */ + + MEM_CGROUP_STAT_NSTATS, +}; + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* * All charge functions with gfp_mask should use GFP_KERNEL or @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern int do_swap_account; #endif +extern long mem_cgroup_dirty_ratio(void); +extern unsigned long mem_cgroup_dirty_bytes(void); +extern long mem_cgroup_dirty_background_ratio(void); +extern unsigned long mem_cgroup_dirty_background_bytes(void); + +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item); + static inline bool mem_cgroup_disabled(void) { if (mem_cgroup_subsys.disabled) @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void) } extern bool mem_cgroup_oom_called(struct task_struct *task); -void mem_cgroup_update_file_mapped(struct page *page, int val); +void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val); unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, int nid, int zid); @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_update_file_mapped(struct page *page, - int val) +static inline void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) { } @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, return 0; } +static inline long mem_cgroup_dirty_ratio(void) +{ + return vm_dirty_ratio; +} + +static inline unsigned long mem_cgroup_dirty_bytes(void) +{ + return vm_dirty_bytes; +} + +static inline long mem_cgroup_dirty_background_ratio(void) +{ + return dirty_background_ratio; +} + +static inline unsigned long mem_cgroup_dirty_background_bytes(void) +{ + return dirty_background_bytes; +} + +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item) +{ + return -ENOMEM; Why ENOMEM? Probably, EINVAL or ENOSYS? +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a443c30..e74cf66 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure
* Andrea Righi ari...@develer.com [2010-03-01 22:23:39]: Infrastructure to account dirty pages per cgroup and add dirty limit interfaces in the cgroupfs: - Direct write-out: memory.dirty_ratio, memory.dirty_bytes - Background write-out: memory.dirty_background_ratio, memory.dirty_background_bytes Signed-off-by: Andrea Righi ari...@develer.com Look good, but yet to be tested from my side. --- include/linux/memcontrol.h | 77 ++- mm/memcontrol.c| 336 2 files changed, 384 insertions(+), 29 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f9b119..cc88b2e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,12 +19,50 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H + +#include linux/writeback.h #include linux/cgroup.h + struct mem_cgroup; struct page_cgroup; struct page; struct mm_struct; +/* Cgroup memory statistics items exported to the kernel */ +enum mem_cgroup_page_stat_item { + MEMCG_NR_DIRTYABLE_PAGES, + MEMCG_NR_RECLAIM_PAGES, + MEMCG_NR_WRITEBACK, + MEMCG_NR_DIRTY_WRITEBACK_PAGES, +}; + +/* + * Statistics for memory cgroup. + */ +enum mem_cgroup_stat_index { + /* + * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. + */ + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ + MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ + MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ + MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out. + used by soft limit implementation */ + MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out. + used by threshold implementation */ + MEM_CGROUP_STAT_FILE_DIRTY, /* # of dirty pages in page cache */ + MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_WRITEBACK_TEMP, /* # of pages under writeback using + temporary buffers */ + MEM_CGROUP_STAT_UNSTABLE_NFS, /* # of NFS unstable pages */ + + MEM_CGROUP_STAT_NSTATS, +}; + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* * All charge functions with gfp_mask should use GFP_KERNEL or @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern int do_swap_account; #endif +extern long mem_cgroup_dirty_ratio(void); +extern unsigned long mem_cgroup_dirty_bytes(void); +extern long mem_cgroup_dirty_background_ratio(void); +extern unsigned long mem_cgroup_dirty_background_bytes(void); + +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item); + Docstyle comments for each function would be appreciated static inline bool mem_cgroup_disabled(void) { if (mem_cgroup_subsys.disabled) @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void) } extern bool mem_cgroup_oom_called(struct task_struct *task); -void mem_cgroup_update_file_mapped(struct page *page, int val); Good to see you make generic use of this function +void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val); unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, int nid, int zid); @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_update_file_mapped(struct page *page, - int val) +static inline void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) { } @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, return 0; } +static inline long mem_cgroup_dirty_ratio(void) +{ + return vm_dirty_ratio; +} + +static inline unsigned long mem_cgroup_dirty_bytes(void) +{ + return vm_dirty_bytes; +} + +static inline long mem_cgroup_dirty_background_ratio(void) +{ + return dirty_background_ratio; +} + +static inline unsigned long mem_cgroup_dirty_background_bytes(void) +{ + return dirty_background_bytes; +} + +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item) +{ + return -ENOMEM; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a443c30..e74cf66
[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure
On Mon, 1 Mar 2010 22:23:39 +0100 Andrea Righi ari...@develer.com wrote: Infrastructure to account dirty pages per cgroup and add dirty limit interfaces in the cgroupfs: - Direct write-out: memory.dirty_ratio, memory.dirty_bytes - Background write-out: memory.dirty_background_ratio, memory.dirty_background_bytes Signed-off-by: Andrea Righi ari...@develer.com seems nice. You can add my ack. Acked-by: KAMEZAWA Hiroyuki kamezawa.hir...@jp.fujitsu.com But maybe you have to wait for a while. (some amounts of series of patches are posted...) please be patient. BTW, No TODO anymore ? Regards, -Kame --- include/linux/memcontrol.h | 77 ++- mm/memcontrol.c| 336 2 files changed, 384 insertions(+), 29 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1f9b119..cc88b2e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -19,12 +19,50 @@ #ifndef _LINUX_MEMCONTROL_H #define _LINUX_MEMCONTROL_H + +#include linux/writeback.h #include linux/cgroup.h + struct mem_cgroup; struct page_cgroup; struct page; struct mm_struct; +/* Cgroup memory statistics items exported to the kernel */ +enum mem_cgroup_page_stat_item { + MEMCG_NR_DIRTYABLE_PAGES, + MEMCG_NR_RECLAIM_PAGES, + MEMCG_NR_WRITEBACK, + MEMCG_NR_DIRTY_WRITEBACK_PAGES, +}; + +/* + * Statistics for memory cgroup. + */ +enum mem_cgroup_stat_index { + /* + * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. + */ + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ + MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ + MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ + MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ + MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ + MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out. + used by soft limit implementation */ + MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out. + used by threshold implementation */ + MEM_CGROUP_STAT_FILE_DIRTY, /* # of dirty pages in page cache */ + MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ + MEM_CGROUP_STAT_WRITEBACK_TEMP, /* # of pages under writeback using + temporary buffers */ + MEM_CGROUP_STAT_UNSTABLE_NFS, /* # of NFS unstable pages */ + + MEM_CGROUP_STAT_NSTATS, +}; + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* * All charge functions with gfp_mask should use GFP_KERNEL or @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern int do_swap_account; #endif +extern long mem_cgroup_dirty_ratio(void); +extern unsigned long mem_cgroup_dirty_bytes(void); +extern long mem_cgroup_dirty_background_ratio(void); +extern unsigned long mem_cgroup_dirty_background_bytes(void); + +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item); + static inline bool mem_cgroup_disabled(void) { if (mem_cgroup_subsys.disabled) @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void) } extern bool mem_cgroup_oom_called(struct task_struct *task); -void mem_cgroup_update_file_mapped(struct page *page, int val); +void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val); unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask, int nid, int zid); @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_update_file_mapped(struct page *page, - int val) +static inline void mem_cgroup_update_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) { } @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, return 0; } +static inline long mem_cgroup_dirty_ratio(void) +{ + return vm_dirty_ratio; +} + +static inline unsigned long mem_cgroup_dirty_bytes(void) +{ + return vm_dirty_bytes; +} + +static inline long mem_cgroup_dirty_background_ratio(void) +{ + return dirty_background_ratio; +} + +static inline unsigned long mem_cgroup_dirty_background_bytes(void) +{ + return dirty_background_bytes; +} + +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item) +{ + return -ENOMEM; +} + #endif /* CONFIG_CGROUP_MEM_CONT */