[Devel] [PATCH -mmotm 5/5] memcg: dirty pages instrumentation

2010-03-30 Thread Andrea Righi
Apply the cgroup dirty pages accounting and limiting infrastructure to
the opportune kernel functions.

[ NOTE: for now do not account WritebackTmp pages (FUSE) and NILFS2
bounce pages. This depends on charging also bounce pages per cgroup. ]

As a bonus, make determine_dirtyable_memory() static again: this
function isn't used anymore outside page writeback.

Signed-off-by: Andrea Righi ari...@develer.com
---
 fs/nfs/write.c|4 +
 include/linux/writeback.h |2 -
 mm/filemap.c  |1 +
 mm/page-writeback.c   |  215 -
 mm/rmap.c |4 +-
 mm/truncate.c |1 +
 6 files changed, 141 insertions(+), 86 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53ff70e..3e8b9f8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,6 +440,7 @@ nfs_mark_request_commit(struct nfs_page *req)
NFS_PAGE_TAG_COMMIT);
nfsi-ncommit++;
spin_unlock(inode-i_lock);
+   mem_cgroup_inc_page_stat(req-wb_page, MEMCG_NR_FILE_UNSTABLE_NFS);
inc_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
inc_bdi_stat(req-wb_page-mapping-backing_dev_info, BDI_RECLAIMABLE);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -451,6 +452,7 @@ nfs_clear_request_commit(struct nfs_page *req)
struct page *page = req-wb_page;
 
if (test_and_clear_bit(PG_CLEAN, (req)-wb_flags)) {
+   mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_UNSTABLE_NFS);
dec_zone_page_state(page, NR_UNSTABLE_NFS);
dec_bdi_stat(page-mapping-backing_dev_info, BDI_RECLAIMABLE);
return 1;
@@ -1277,6 +1279,8 @@ nfs_commit_list(struct inode *inode, struct list_head 
*head, int how)
req = nfs_list_entry(head-next);
nfs_list_remove_request(req);
nfs_mark_request_commit(req);
+   mem_cgroup_dec_page_stat(req-wb_page,
+   MEMCG_NR_FILE_UNSTABLE_NFS);
dec_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
dec_bdi_stat(req-wb_page-mapping-backing_dev_info,
BDI_RECLAIMABLE);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index dd9512d..39e4cb2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -117,8 +117,6 @@ extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
 
-extern unsigned long determine_dirtyable_memory(void);
-
 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
diff --git a/mm/filemap.c b/mm/filemap.c
index 62cbac0..bd833fe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -135,6 +135,7 @@ void __remove_from_page_cache(struct page *page)
 * having removed the page entirely.
 */
if (PageDirty(page)  mapping_cap_account_dirty(mapping)) {
+   mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_DIRTY);
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping-backing_dev_info, BDI_RECLAIMABLE);
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ab84693..fcac9b4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -131,6 +131,111 @@ static struct prop_descriptor vm_completions;
 static struct prop_descriptor vm_dirties;
 
 /*
+ * Work out the current dirty-memory clamping and background writeout
+ * thresholds.
+ *
+ * The main aim here is to lower them aggressively if there is a lot of mapped
+ * memory around.  To avoid stressing page reclaim with lots of unreclaimable
+ * pages.  It is better to clamp down on writers than to start swapping, and
+ * performing lots of scanning.
+ *
+ * We only allow 1/2 of the currently-unmapped memory to be dirtied.
+ *
+ * We don't permit the clamping level to fall below 5% - that is getting rather
+ * excessive.
+ *
+ * We make sure that the background writeout level is below the adjusted
+ * clamping level.
+ */
+
+static unsigned long highmem_dirtyable_memory(unsigned long total)
+{
+#ifdef CONFIG_HIGHMEM
+   int node;
+   unsigned long x = 0;
+
+   for_each_node_state(node, N_HIGH_MEMORY) {
+   struct zone *z =
+   NODE_DATA(node)-node_zones[ZONE_HIGHMEM];
+
+   x += zone_page_state(z, NR_FREE_PAGES) +
+zone_reclaimable_pages(z);
+   }
+   /*
+* Make sure that the number of highmem pages is never larger
+* than the number of the total dirtyable memory. This can only
+* occur in very strange VM situations but we want to make sure
+* that this does not occur.
+*/
+   return min(x, total);
+#else
+   return 0;
+#endif
+}
+
+static unsigned long get_global_dirtyable_memory(void)
+{
+   unsigned long memory;
+
+   memory = 

[Devel] [PATCH -mmotm 5/5] memcg: dirty pages instrumentation

2010-03-30 Thread Andrea Righi
Apply the cgroup dirty pages accounting and limiting infrastructure to
the opportune kernel functions.

[ NOTE: for now do not account WritebackTmp pages (FUSE) and NILFS2
bounce pages. This depends on charging also bounce pages per cgroup. ]

As a bonus, make determine_dirtyable_memory() static again: this
function isn't used anymore outside page writeback.

Signed-off-by: Andrea Righi ari...@develer.com
---
 fs/nfs/write.c|4 +
 include/linux/writeback.h |2 -
 mm/filemap.c  |1 +
 mm/page-writeback.c   |  215 -
 mm/rmap.c |4 +-
 mm/truncate.c |1 +
 6 files changed, 141 insertions(+), 86 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53ff70e..3e8b9f8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -440,6 +440,7 @@ nfs_mark_request_commit(struct nfs_page *req)
NFS_PAGE_TAG_COMMIT);
nfsi-ncommit++;
spin_unlock(inode-i_lock);
+   mem_cgroup_inc_page_stat(req-wb_page, MEMCG_NR_FILE_UNSTABLE_NFS);
inc_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
inc_bdi_stat(req-wb_page-mapping-backing_dev_info, BDI_RECLAIMABLE);
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -451,6 +452,7 @@ nfs_clear_request_commit(struct nfs_page *req)
struct page *page = req-wb_page;
 
if (test_and_clear_bit(PG_CLEAN, (req)-wb_flags)) {
+   mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_UNSTABLE_NFS);
dec_zone_page_state(page, NR_UNSTABLE_NFS);
dec_bdi_stat(page-mapping-backing_dev_info, BDI_RECLAIMABLE);
return 1;
@@ -1277,6 +1279,8 @@ nfs_commit_list(struct inode *inode, struct list_head 
*head, int how)
req = nfs_list_entry(head-next);
nfs_list_remove_request(req);
nfs_mark_request_commit(req);
+   mem_cgroup_dec_page_stat(req-wb_page,
+   MEMCG_NR_FILE_UNSTABLE_NFS);
dec_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
dec_bdi_stat(req-wb_page-mapping-backing_dev_info,
BDI_RECLAIMABLE);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index dd9512d..39e4cb2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -117,8 +117,6 @@ extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
 
-extern unsigned long determine_dirtyable_memory(void);
-
 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
diff --git a/mm/filemap.c b/mm/filemap.c
index 62cbac0..bd833fe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -135,6 +135,7 @@ void __remove_from_page_cache(struct page *page)
 * having removed the page entirely.
 */
if (PageDirty(page)  mapping_cap_account_dirty(mapping)) {
+   mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_DIRTY);
dec_zone_page_state(page, NR_FILE_DIRTY);
dec_bdi_stat(mapping-backing_dev_info, BDI_RECLAIMABLE);
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ab84693..fcac9b4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -131,6 +131,111 @@ static struct prop_descriptor vm_completions;
 static struct prop_descriptor vm_dirties;
 
 /*
+ * Work out the current dirty-memory clamping and background writeout
+ * thresholds.
+ *
+ * The main aim here is to lower them aggressively if there is a lot of mapped
+ * memory around.  To avoid stressing page reclaim with lots of unreclaimable
+ * pages.  It is better to clamp down on writers than to start swapping, and
+ * performing lots of scanning.
+ *
+ * We only allow 1/2 of the currently-unmapped memory to be dirtied.
+ *
+ * We don't permit the clamping level to fall below 5% - that is getting rather
+ * excessive.
+ *
+ * We make sure that the background writeout level is below the adjusted
+ * clamping level.
+ */
+
+static unsigned long highmem_dirtyable_memory(unsigned long total)
+{
+#ifdef CONFIG_HIGHMEM
+   int node;
+   unsigned long x = 0;
+
+   for_each_node_state(node, N_HIGH_MEMORY) {
+   struct zone *z =
+   NODE_DATA(node)-node_zones[ZONE_HIGHMEM];
+
+   x += zone_page_state(z, NR_FREE_PAGES) +
+zone_reclaimable_pages(z);
+   }
+   /*
+* Make sure that the number of highmem pages is never larger
+* than the number of the total dirtyable memory. This can only
+* occur in very strange VM situations but we want to make sure
+* that this does not occur.
+*/
+   return min(x, total);
+#else
+   return 0;
+#endif
+}
+
+static unsigned long get_global_dirtyable_memory(void)
+{
+   unsigned long memory;
+
+   memory =