[Devel] [PATCH -mmotm 4/5] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Andrea Righi
Infrastructure to account dirty pages per cgroup and add dirty limit
interfaces in the cgroupfs:

 - Direct write-out: memory.dirty_ratio, memory.dirty_bytes

 - Background write-out: memory.dirty_background_ratio, 
memory.dirty_background_bytes

Signed-off-by: Andrea Righi ari...@develer.com
---
 include/linux/memcontrol.h |   87 +-
 mm/memcontrol.c|  432 
 2 files changed, 480 insertions(+), 39 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44301c6..0602ec9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -19,12 +19,55 @@
 
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
+
+#include linux/writeback.h
 #include linux/cgroup.h
+
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
 
+/* Cgroup memory statistics items exported to the kernel */
+enum mem_cgroup_read_page_stat_item {
+   MEMCG_NR_DIRTYABLE_PAGES,
+   MEMCG_NR_RECLAIM_PAGES,
+   MEMCG_NR_WRITEBACK,
+   MEMCG_NR_DIRTY_WRITEBACK_PAGES,
+};
+
+/* File cache pages accounting */
+enum mem_cgroup_write_page_stat_item {
+   MEMCG_NR_FILE_MAPPED,   /* # of pages charged as file rss */
+   MEMCG_NR_FILE_DIRTY,/* # of dirty pages in page cache */
+   MEMCG_NR_FILE_WRITEBACK,/* # of pages under writeback */
+   MEMCG_NR_FILE_WRITEBACK_TEMP,   /* # of pages under writeback using
+  temporary buffers */
+   MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */
+
+   MEMCG_NR_FILE_NSTAT,
+};
+
+/* Dirty memory parameters */
+struct vm_dirty_param {
+   int dirty_ratio;
+   int dirty_background_ratio;
+   unsigned long dirty_bytes;
+   unsigned long dirty_background_bytes;
+};
+
+/*
+ * TODO: provide a validation check routine. And retry if validation
+ * fails.
+ */
+static inline void get_global_vm_dirty_param(struct vm_dirty_param *param)
+{
+   param-dirty_ratio = vm_dirty_ratio;
+   param-dirty_bytes = vm_dirty_bytes;
+   param-dirty_background_ratio = dirty_background_ratio;
+   param-dirty_background_bytes = dirty_background_bytes;
+}
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /*
  * All charge functions with gfp_mask should use GFP_KERNEL or
@@ -117,6 +160,25 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
*memcg,
 extern int do_swap_account;
 #endif
 
+extern bool mem_cgroup_has_dirty_limit(void);
+extern void get_vm_dirty_param(struct vm_dirty_param *param);
+extern s64 mem_cgroup_page_stat(enum mem_cgroup_read_page_stat_item item);
+
+extern void mem_cgroup_update_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx, bool charge);
+
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat(page, idx, true);
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat(page, idx, false);
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
if (mem_cgroup_subsys.disabled)
@@ -124,7 +186,6 @@ static inline bool mem_cgroup_disabled(void)
return false;
 }
 
-void mem_cgroup_update_file_mapped(struct page *page, int val);
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid,
int zid);
@@ -294,8 +355,18 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct 
task_struct *p)
 {
 }
 
-static inline void mem_cgroup_update_file_mapped(struct page *page,
-   int val)
+static inline s64 mem_cgroup_page_stat(enum mem_cgroup_read_page_stat_item 
item)
+{
+   return -ENOSYS;
+}
+
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
 {
 }
 
@@ -306,6 +377,16 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone 
*zone, int order,
return 0;
 }
 
+static inline bool mem_cgroup_has_dirty_limit(void)
+{
+   return false;
+}
+
+static inline void get_vm_dirty_param(struct vm_dirty_param *param)
+{
+   get_global_vm_dirty_param(param);
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a9fd736..ffcf37c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -80,14 +80,21 @@ enum mem_cgroup_stat_index {
/*
 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
 */
-   MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
+   MEM_CGROUP_STAT_CACHE,   

[Devel] [PATCH -mmotm 4/5] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Andrea Righi
Infrastructure to account dirty pages per cgroup and add dirty limit
interfaces in the cgroupfs:

 - Direct write-out: memory.dirty_ratio, memory.dirty_bytes

 - Background write-out: memory.dirty_background_ratio, 
memory.dirty_background_bytes

Signed-off-by: Andrea Righi ari...@develer.com
---
 include/linux/memcontrol.h |   92 -
 mm/memcontrol.c|  484 +---
 2 files changed, 540 insertions(+), 36 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 88d3f9e..0602ec9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -19,12 +19,55 @@
 
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
+
+#include linux/writeback.h
 #include linux/cgroup.h
+
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
 
+/* Cgroup memory statistics items exported to the kernel */
+enum mem_cgroup_read_page_stat_item {
+   MEMCG_NR_DIRTYABLE_PAGES,
+   MEMCG_NR_RECLAIM_PAGES,
+   MEMCG_NR_WRITEBACK,
+   MEMCG_NR_DIRTY_WRITEBACK_PAGES,
+};
+
+/* File cache pages accounting */
+enum mem_cgroup_write_page_stat_item {
+   MEMCG_NR_FILE_MAPPED,   /* # of pages charged as file rss */
+   MEMCG_NR_FILE_DIRTY,/* # of dirty pages in page cache */
+   MEMCG_NR_FILE_WRITEBACK,/* # of pages under writeback */
+   MEMCG_NR_FILE_WRITEBACK_TEMP,   /* # of pages under writeback using
+  temporary buffers */
+   MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */
+
+   MEMCG_NR_FILE_NSTAT,
+};
+
+/* Dirty memory parameters */
+struct vm_dirty_param {
+   int dirty_ratio;
+   int dirty_background_ratio;
+   unsigned long dirty_bytes;
+   unsigned long dirty_background_bytes;
+};
+
+/*
+ * TODO: provide a validation check routine. And retry if validation
+ * fails.
+ */
+static inline void get_global_vm_dirty_param(struct vm_dirty_param *param)
+{
+   param-dirty_ratio = vm_dirty_ratio;
+   param-dirty_bytes = vm_dirty_bytes;
+   param-dirty_background_ratio = dirty_background_ratio;
+   param-dirty_background_bytes = dirty_background_bytes;
+}
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /*
  * All charge functions with gfp_mask should use GFP_KERNEL or
@@ -117,6 +160,25 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
*memcg,
 extern int do_swap_account;
 #endif
 
+extern bool mem_cgroup_has_dirty_limit(void);
+extern void get_vm_dirty_param(struct vm_dirty_param *param);
+extern s64 mem_cgroup_page_stat(enum mem_cgroup_read_page_stat_item item);
+
+extern void mem_cgroup_update_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx, bool charge);
+
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat(page, idx, true);
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+   mem_cgroup_update_page_stat(page, idx, false);
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
if (mem_cgroup_subsys.disabled)
@@ -124,12 +186,6 @@ static inline bool mem_cgroup_disabled(void)
return false;
 }
 
-enum mem_cgroup_page_stat_item {
-   MEMCG_NR_FILE_MAPPED,
-   MEMCG_NR_FILE_NSTAT,
-};
-
-void mem_cgroup_update_stat(struct page *page, int idx, bool charge);
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask, int nid,
int zid);
@@ -299,8 +355,18 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct 
task_struct *p)
 {
 }
 
-static inline void mem_cgroup_update_file_mapped(struct page *page,
-   int val)
+static inline s64 mem_cgroup_page_stat(enum mem_cgroup_read_page_stat_item 
item)
+{
+   return -ENOSYS;
+}
+
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
+{
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+   enum mem_cgroup_write_page_stat_item idx)
 {
 }
 
@@ -311,6 +377,16 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone 
*zone, int order,
return 0;
 }
 
+static inline bool mem_cgroup_has_dirty_limit(void)
+{
+   return false;
+}
+
+static inline void get_vm_dirty_param(struct vm_dirty_param *param)
+{
+   get_global_vm_dirty_param(param);
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b7c23ea..91770d0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -80,14 +80,21 @@ enum mem_cgroup_stat_index {
/*
 * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.