[Devel] [PATCH -mmotm 3/4] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Andrea Righi
Infrastructure to account dirty pages per cgroup and add dirty limit
interfaces in the cgroupfs:

 - Direct write-out: memory.dirty_ratio, memory.dirty_bytes

 - Background write-out: memory.dirty_background_ratio, 
memory.dirty_background_bytes

Signed-off-by: Andrea Righi ari...@develer.com
---
 include/linux/memcontrol.h |   80 -
 mm/memcontrol.c|  420 +++-
 2 files changed, 450 insertions(+), 50 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1f9b119..cc3421b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -19,12 +19,66 @@
 
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
+
+#include linux/writeback.h
 #include linux/cgroup.h
+
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
 
+/* Cgroup memory statistics items exported to the kernel */
+enum mem_cgroup_page_stat_item {
+   MEMCG_NR_DIRTYABLE_PAGES,
+   MEMCG_NR_RECLAIM_PAGES,
+   MEMCG_NR_WRITEBACK,
+   MEMCG_NR_DIRTY_WRITEBACK_PAGES,
+};
+
+/* Dirty memory parameters */
+struct dirty_param {
+   int dirty_ratio;
+   unsigned long dirty_bytes;
+   int dirty_background_ratio;
+   unsigned long dirty_background_bytes;
+};
+
+/*
+ * Statistics for memory cgroup.
+ */
+enum mem_cgroup_stat_index {
+   /*
+* For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
+*/
+   MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
+   MEM_CGROUP_STAT_RSS,   /* # of pages charged as anon rss */
+   MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
+   MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
+   MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
+   MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
+   MEM_CGROUP_EVENTS,  /* incremented at every  pagein/pageout */
+   MEM_CGROUP_STAT_FILE_DIRTY,   /* # of dirty pages in page cache */
+   MEM_CGROUP_STAT_WRITEBACK,   /* # of pages under writeback */
+   MEM_CGROUP_STAT_WRITEBACK_TEMP,   /* # of pages under writeback using
+   temporary buffers */
+   MEM_CGROUP_STAT_UNSTABLE_NFS,   /* # of NFS unstable pages */
+
+   MEM_CGROUP_STAT_NSTATS,
+};
+
+/*
+ * TODO: provide a validation check routine. And retry if validation
+ * fails.
+ */
+static inline void get_global_dirty_param(struct dirty_param *param)
+{
+   param-dirty_ratio = vm_dirty_ratio;
+   param-dirty_bytes = vm_dirty_bytes;
+   param-dirty_background_ratio = dirty_background_ratio;
+   param-dirty_background_bytes = dirty_background_bytes;
+}
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /*
  * All charge functions with gfp_mask should use GFP_KERNEL or
@@ -117,6 +171,10 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
*memcg,
 extern int do_swap_account;
 #endif
 
+extern bool mem_cgroup_has_dirty_limit(void);
+extern void get_dirty_param(struct dirty_param *param);
+extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item);
+
 static inline bool mem_cgroup_disabled(void)
 {
if (mem_cgroup_subsys.disabled)
@@ -125,7 +183,8 @@ static inline bool mem_cgroup_disabled(void)
 }
 
 extern bool mem_cgroup_oom_called(struct task_struct *task);
-void mem_cgroup_update_file_mapped(struct page *page, int val);
+void mem_cgroup_update_stat(struct page *page,
+   enum mem_cgroup_stat_index idx, int val);
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid,
int zid);
@@ -300,8 +359,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct 
task_struct *p)
 {
 }
 
-static inline void mem_cgroup_update_file_mapped(struct page *page,
-   int val)
+static inline void mem_cgroup_update_stat(struct page *page,
+   enum mem_cgroup_stat_index idx, int val)
 {
 }
 
@@ -312,6 +371,21 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone 
*zone, int order,
return 0;
 }
 
+static inline bool mem_cgroup_has_dirty_limit(void)
+{
+   return false;
+}
+
+static inline void get_dirty_param(struct dirty_param *param)
+{
+   get_global_dirty_param(param);
+}
+
+static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
+{
+   return -ENOSYS;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 497b6f7..9842e7b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -73,28 +73,23 @@ static int really_do_swap_account __initdata = 1; /* for 
remember boot option*/
 #define THRESHOLDS_EVENTS_THRESH (7) /* once in 128 */
 #define SOFTLIMIT_EVENTS_THRESH (10) /* once in 1024 */
 
-/*
- * Statistics for memory cgroup.
- */
-enum 

[Devel] [PATCH -mmotm 3/4] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Andrea Righi
Infrastructure to account dirty pages per cgroup and to add dirty limit
interface to the cgroupfs:

 - Direct write-out: memory.dirty_ratio, memory.dirty_bytes

 - Background write-out: memory.dirty_background_ratio, 
memory.dirty_background_bytes

Signed-off-by: Andrea Righi ari...@develer.com
---
 include/linux/memcontrol.h |  122 +++-
 mm/memcontrol.c|  507 +---
 2 files changed, 593 insertions(+), 36 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44301c6..61fdca4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -19,12 +19,55 @@
 
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
+
+#include linux/writeback.h
 #include linux/cgroup.h
+
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
 
+/* Cgroup memory statistics items exported to the kernel */
+enum mem_cgroup_read_page_stat_item {
+   MEMCG_NR_DIRTYABLE_PAGES,
+   MEMCG_NR_RECLAIM_PAGES,
+   MEMCG_NR_WRITEBACK,
+   MEMCG_NR_DIRTY_WRITEBACK_PAGES,
+};
+
+/* File cache pages accounting */
+enum mem_cgroup_write_page_stat_item {
+   MEMCG_NR_FILE_MAPPED,   /* # of pages charged as file rss */
+   MEMCG_NR_FILE_DIRTY,/* # of dirty pages in page cache */
+   MEMCG_NR_FILE_WRITEBACK,/* # of pages under writeback */
+   MEMCG_NR_FILE_WRITEBACK_TEMP,   /* # of pages under writeback using
+  temporary buffers */
+   MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */
+
+   MEMCG_NR_FILE_NSTAT,
+};
+
+/* Dirty memory parameters */
+struct vm_dirty_param {
+   int dirty_ratio;
+   int dirty_background_ratio;
+   unsigned long dirty_bytes;
+   unsigned long dirty_background_bytes;
+};
+
+/*
+ * TODO: provide a validation check routine. And retry if validation
+ * fails.
+ */
+static inline void get_global_vm_dirty_param(struct vm_dirty_param *param)
+{
+   param-dirty_ratio = vm_dirty_ratio;
+   param-dirty_bytes = vm_dirty_bytes;
+   param-dirty_background_ratio = dirty_background_ratio;
+   param-dirty_background_bytes = dirty_background_bytes;
+}
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /*
  * All charge functions with gfp_mask should use GFP_KERNEL or
@@ -117,6 +160,40 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
*memcg,
 extern int do_swap_account;
 #endif
 
+extern bool mem_cgroup_has_dirty_limit(void);
+extern void get_vm_dirty_param(struct vm_dirty_param *param);
+extern s64 mem_cgroup_page_stat(enum mem_cgroup_read_page_stat_item item);
+
+extern void mem_cgroup_update_page_stat_locked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx, bool charge);
+
+extern void mem_cgroup_update_page_stat_unlocked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx, bool charge);
+
+static inline void mem_cgroup_inc_page_stat_locked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat_locked(page, idx, true);
+}
+
+static inline void mem_cgroup_dec_page_stat_locked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat_locked(page, idx, false);
+}
+
+static inline void mem_cgroup_inc_page_stat_unlocked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat_unlocked(page, idx, true);
+}
+
+static inline void mem_cgroup_dec_page_stat_unlocked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat_unlocked(page, idx, false);
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
if (mem_cgroup_subsys.disabled)
@@ -124,7 +201,6 @@ static inline bool mem_cgroup_disabled(void)
return false;
 }
 
-void mem_cgroup_update_file_mapped(struct page *page, int val);
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid,
int zid);
@@ -294,8 +370,38 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct 
task_struct *p)
 {
 }
 
-static inline void mem_cgroup_update_file_mapped(struct page *page,
-   int val)
+static inline s64 mem_cgroup_page_stat(enum mem_cgroup_read_page_stat_item 
item)
+{
+   return -ENOSYS;
+}
+
+static inline void mem_cgroup_update_page_stat_locked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx, bool charge)
+{
+}
+
+static inline void mem_cgroup_update_page_stat_unlocked(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx, bool charge)
+{
+}
+
+static inline void mem_cgroup_inc_page_stat_locked(struct page *page,
+   enum