[Devel] Re: [PATCH -mmotm 4/4] memcg: dirty pages instrumentation

2010-03-30 Thread Andrea Righi
On Thu, Mar 04, 2010 at 11:18:28AM -0500, Vivek Goyal wrote:
 On Thu, Mar 04, 2010 at 11:40:15AM +0100, Andrea Righi wrote:
 
 [..]
  diff --git a/mm/page-writeback.c b/mm/page-writeback.c
  index 5a0f8f3..c5d14ea 100644
  --- a/mm/page-writeback.c
  +++ b/mm/page-writeback.c
  @@ -137,13 +137,16 @@ static struct prop_descriptor vm_dirties;
*/
   static int calc_period_shift(void)
   {
  +   struct dirty_param dirty_param;
  unsigned long dirty_total;
   
  -   if (vm_dirty_bytes)
  -   dirty_total = vm_dirty_bytes / PAGE_SIZE;
  +   get_dirty_param(dirty_param);
  +
  +   if (dirty_param.dirty_bytes)
  +   dirty_total = dirty_param.dirty_bytes / PAGE_SIZE;
  else
  -   dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
  -   100;
  +   dirty_total = (dirty_param.dirty_ratio *
  +   determine_dirtyable_memory()) / 100;
  return 2 + ilog2(dirty_total - 1);
   }
   
  @@ -408,41 +411,46 @@ static unsigned long 
  highmem_dirtyable_memory(unsigned long total)
*/
   unsigned long determine_dirtyable_memory(void)
   {
  -   unsigned long x;
  -
  -   x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
  +   unsigned long memory;
  +   s64 memcg_memory;
   
  +   memory = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
  if (!vm_highmem_is_dirtyable)
  -   x -= highmem_dirtyable_memory(x);
  -
  -   return x + 1;   /* Ensure that we never return 0 */
  +   memory -= highmem_dirtyable_memory(memory);
  +   if (mem_cgroup_has_dirty_limit())
  +   return memory + 1;
 
 Should above be?
   if (!mem_cgroup_has_dirty_limit())
   return memory + 1;

Very true.

I'll post another patch with this and Kirill's fixes.

Thanks,
-Andrea

 
 Vivek
 
  +   memcg_memory = mem_cgroup_page_stat(MEMCG_NR_DIRTYABLE_PAGES);
  +   return min((unsigned long)memcg_memory, memory + 1);
   }
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH -mmotm 4/4] memcg: dirty pages instrumentation

2010-03-30 Thread Andrea Righi
On Thu, Mar 04, 2010 at 02:41:44PM -0500, Vivek Goyal wrote:
 On Thu, Mar 04, 2010 at 11:40:15AM +0100, Andrea Righi wrote:
 
 [..]
  diff --git a/mm/page-writeback.c b/mm/page-writeback.c
  index 5a0f8f3..c5d14ea 100644
  --- a/mm/page-writeback.c
  +++ b/mm/page-writeback.c
  @@ -137,13 +137,16 @@ static struct prop_descriptor vm_dirties;
*/
   static int calc_period_shift(void)
   {
  +   struct dirty_param dirty_param;
  unsigned long dirty_total;
   
  -   if (vm_dirty_bytes)
  -   dirty_total = vm_dirty_bytes / PAGE_SIZE;
  +   get_dirty_param(dirty_param);
  +
  +   if (dirty_param.dirty_bytes)
  +   dirty_total = dirty_param.dirty_bytes / PAGE_SIZE;
  else
  -   dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
  -   100;
  +   dirty_total = (dirty_param.dirty_ratio *
  +   determine_dirtyable_memory()) / 100;
  return 2 + ilog2(dirty_total - 1);
   }
   
 
 Hmm.., I have been staring at this for some time and I think something is
 wrong. I don't fully understand the way floating proportions are working
 but this function seems to be calculating the period over which we need
 to measuer the proportions. (vm_completion proportion and vm_dirties
 proportions).
 
 And we this period (shift), when admin updates dirty_ratio or dirty_bytes
 etc. In that case we recalculate the global dirty limit and take log2 and
 use that as period over which we monitor and calculate proportions.
 
 If yes, then it should be global and not per cgroup (because all our 
 accouting of bdi completion is global and not per cgroup).
 
 PeterZ, can tell us more about it. I am just raising the flag here to be
 sure.
 
 Thanks
 Vivek

Hi Vivek,

I tend to agree, we must use global dirty values here.

BTW, update_completion_period() is called from dirty_* handlers, so it's
totally unrelated to use the current memcg. That's the memcg where the
admin is running, so probably it's the root memcg almost all the time,
but it's wrong in principle. In conclusion this patch shouldn't touch
calc_period_shift().

Thanks,
-Andrea
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH -mmotm 4/4] memcg: dirty pages instrumentation

2010-03-30 Thread Andrea Righi
On Fri, Mar 05, 2010 at 12:08:43PM +0530, Balbir Singh wrote:
 * Andrea Righi ari...@develer.com [2010-03-04 11:40:15]:
 
  Apply the cgroup dirty pages accounting and limiting infrastructure
  to the opportune kernel functions.
  
  Signed-off-by: Andrea Righi ari...@develer.com
  ---
   fs/fuse/file.c  |5 +++
   fs/nfs/write.c  |4 ++
   fs/nilfs2/segment.c |   11 +-
   mm/filemap.c|1 +
   mm/page-writeback.c |   91 
  ++-
   mm/rmap.c   |4 +-
   mm/truncate.c   |2 +
   7 files changed, 84 insertions(+), 34 deletions(-)
  
  diff --git a/fs/fuse/file.c b/fs/fuse/file.c
  index a9f5e13..dbbdd53 100644
  --- a/fs/fuse/file.c
  +++ b/fs/fuse/file.c
  @@ -11,6 +11,7 @@
   #include linux/pagemap.h
   #include linux/slab.h
   #include linux/kernel.h
  +#include linux/memcontrol.h
   #include linux/sched.h
   #include linux/module.h
  
  @@ -1129,6 +1130,8 @@ static void fuse_writepage_finish(struct fuse_conn 
  *fc, struct fuse_req *req)
  
  list_del(req-writepages_entry);
  dec_bdi_stat(bdi, BDI_WRITEBACK);
  +   mem_cgroup_update_stat(req-pages[0],
  +   MEM_CGROUP_STAT_WRITEBACK_TEMP, -1);
  dec_zone_page_state(req-pages[0], NR_WRITEBACK_TEMP);
  bdi_writeout_inc(bdi);
  wake_up(fi-page_waitq);
  @@ -1240,6 +1243,8 @@ static int fuse_writepage_locked(struct page *page)
  req-inode = inode;
  
  inc_bdi_stat(mapping-backing_dev_info, BDI_WRITEBACK);
  +   mem_cgroup_update_stat(tmp_page,
  +   MEM_CGROUP_STAT_WRITEBACK_TEMP, 1);
  inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
  end_page_writeback(page);
  
  diff --git a/fs/nfs/write.c b/fs/nfs/write.c
  index b753242..7316f7a 100644
  --- a/fs/nfs/write.c
  +++ b/fs/nfs/write.c
  @@ -439,6 +439,7 @@ nfs_mark_request_commit(struct nfs_page *req)
  req-wb_index,
  NFS_PAGE_TAG_COMMIT);
  spin_unlock(inode-i_lock);
  +   mem_cgroup_update_stat(req-wb_page, MEM_CGROUP_STAT_UNSTABLE_NFS, 1);
  inc_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
  inc_bdi_stat(req-wb_page-mapping-backing_dev_info, BDI_UNSTABLE);
  __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
  @@ -450,6 +451,7 @@ nfs_clear_request_commit(struct nfs_page *req)
  struct page *page = req-wb_page;
  
  if (test_and_clear_bit(PG_CLEAN, (req)-wb_flags)) {
  +   mem_cgroup_update_stat(page, MEM_CGROUP_STAT_UNSTABLE_NFS, -1);
  dec_zone_page_state(page, NR_UNSTABLE_NFS);
  dec_bdi_stat(page-mapping-backing_dev_info, BDI_UNSTABLE);
  return 1;
  @@ -1273,6 +1275,8 @@ nfs_commit_list(struct inode *inode, struct list_head 
  *head, int how)
  req = nfs_list_entry(head-next);
  nfs_list_remove_request(req);
  nfs_mark_request_commit(req);
  +   mem_cgroup_update_stat(req-wb_page,
  +   MEM_CGROUP_STAT_UNSTABLE_NFS, -1);
  dec_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
  dec_bdi_stat(req-wb_page-mapping-backing_dev_info,
  BDI_UNSTABLE);
  diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
  index ada2f1b..27a01b1 100644
  --- a/fs/nilfs2/segment.c
  +++ b/fs/nilfs2/segment.c
  @@ -24,6 +24,7 @@
   #include linux/pagemap.h
   #include linux/buffer_head.h
   #include linux/writeback.h
  +#include linux/memcontrol.h
   #include linux/bio.h
   #include linux/completion.h
   #include linux/blkdev.h
  @@ -1660,8 +1661,11 @@ nilfs_copy_replace_page_buffers(struct page *page, 
  struct list_head *out)
  } while (bh = bh-b_this_page, bh2 = bh2-b_this_page, bh != head);
  kunmap_atomic(kaddr, KM_USER0);
  
  -   if (!TestSetPageWriteback(clone_page))
  +   if (!TestSetPageWriteback(clone_page)) {
  +   mem_cgroup_update_stat(clone_page,
  +   MEM_CGROUP_STAT_WRITEBACK, 1);
 
 I wonder if we should start implementing inc and dec to avoid passing
 the +1 and -1 parameters. It should make the code easier to read.

OK, it's always +1/-1, and I don't see any case where we should use
different numbers. So, better to move to the inc/dec naming.

 
  inc_zone_page_state(clone_page, NR_WRITEBACK);
  +   }
  unlock_page(clone_page);
  
  return 0;
  @@ -1783,8 +1787,11 @@ static void __nilfs_end_page_io(struct page *page, 
  int err)
  }
  
  if (buffer_nilfs_allocated(page_buffers(page))) {
  -   if (TestClearPageWriteback(page))
  +   if (TestClearPageWriteback(page)) {
  +   mem_cgroup_update_stat(page,
  +   MEM_CGROUP_STAT_WRITEBACK, -1);
  dec_zone_page_state(page, NR_WRITEBACK);
  +   }
  } else
  end_page_writeback(page);
   }
  diff --git a/mm/filemap.c b/mm/filemap.c
  index fe09e51..f85acae 100644
  --- a/mm/filemap.c
  +++ b/mm/filemap.c
  @@ 

[Devel] Re: [PATCH -mmotm 4/4] memcg: dirty pages instrumentation

2010-03-07 Thread KAMEZAWA Hiroyuki
On Sun,  7 Mar 2010 21:57:54 +0100
Andrea Righi ari...@develer.com wrote:

 Apply the cgroup dirty pages accounting and limiting infrastructure to
 the opportune kernel functions.
 
 As a bonus, make determine_dirtyable_memory() static again: this
 function isn't used anymore outside page writeback.
 
 Signed-off-by: Andrea Righi ari...@develer.com

I'm sorry if I misunderstand..almost all this kind of accounting is done
under lock_page()...then...


 ---
  fs/fuse/file.c|5 +
  fs/nfs/write.c|6 +
  fs/nilfs2/segment.c   |   11 ++-
  include/linux/writeback.h |2 -
  mm/filemap.c  |1 +
  mm/page-writeback.c   |  224 
 -
  mm/rmap.c |4 +-
  mm/truncate.c |2 +
  8 files changed, 165 insertions(+), 90 deletions(-)
 
 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
 index a9f5e13..9a542e5 100644
 --- a/fs/fuse/file.c
 +++ b/fs/fuse/file.c
 @@ -11,6 +11,7 @@
  #include linux/pagemap.h
  #include linux/slab.h
  #include linux/kernel.h
 +#include linux/memcontrol.h
  #include linux/sched.h
  #include linux/module.h
  
 @@ -1129,6 +1130,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, 
 struct fuse_req *req)
  
   list_del(req-writepages_entry);
   dec_bdi_stat(bdi, BDI_WRITEBACK);
 + mem_cgroup_dec_page_stat_unlocked(req-pages[0],
 + MEMCG_NR_FILE_WRITEBACK_TEMP);
   dec_zone_page_state(req-pages[0], NR_WRITEBACK_TEMP);

Hmm. IIUC, this req-pages[0] is tmp_page, which works as bounce_buffer for 
FUSE.
Then, this req-pages[] is not under any memcg.
So, this accounting never work.


   bdi_writeout_inc(bdi);
   wake_up(fi-page_waitq);
 @@ -1240,6 +1243,8 @@ static int fuse_writepage_locked(struct page *page)
   req-inode = inode;
  
   inc_bdi_stat(mapping-backing_dev_info, BDI_WRITEBACK);
 + mem_cgroup_inc_page_stat_unlocked(tmp_page,
 + MEMCG_NR_FILE_WRITEBACK_TEMP);
   inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
   end_page_writeback(page);
ditto.


  
 diff --git a/fs/nfs/write.c b/fs/nfs/write.c
 index 53ff70e..a35e3c0 100644
 --- a/fs/nfs/write.c
 +++ b/fs/nfs/write.c
 @@ -440,6 +440,8 @@ nfs_mark_request_commit(struct nfs_page *req)
   NFS_PAGE_TAG_COMMIT);
   nfsi-ncommit++;
   spin_unlock(inode-i_lock);
 + mem_cgroup_inc_page_stat_unlocked(req-wb_page,
 + MEMCG_NR_FILE_UNSTABLE_NFS);
   inc_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);

Here, if the page is locked (by lock_page()), it will never be uncharged.
Then, _locked() version stat accounting can be used.


   inc_bdi_stat(req-wb_page-mapping-backing_dev_info, BDI_RECLAIMABLE);
   __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 @@ -451,6 +453,8 @@ nfs_clear_request_commit(struct nfs_page *req)
   struct page *page = req-wb_page;
  
   if (test_and_clear_bit(PG_CLEAN, (req)-wb_flags)) {
 + mem_cgroup_dec_page_stat_unlocked(page,
 + MEMCG_NR_FILE_UNSTABLE_NFS);
ditto.


   dec_zone_page_state(page, NR_UNSTABLE_NFS);
   dec_bdi_stat(page-mapping-backing_dev_info, BDI_RECLAIMABLE);
   return 1;
 @@ -1277,6 +1281,8 @@ nfs_commit_list(struct inode *inode, struct list_head 
 *head, int how)
   req = nfs_list_entry(head-next);
   nfs_list_remove_request(req);
   nfs_mark_request_commit(req);
 + mem_cgroup_dec_page_stat_unlocked(req-wb_page,
 + MEMCG_NR_FILE_UNSTABLE_NFS);

ditto.

   dec_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
   dec_bdi_stat(req-wb_page-mapping-backing_dev_info,
   BDI_RECLAIMABLE);
 diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
 index ada2f1b..fb79558 100644
 --- a/fs/nilfs2/segment.c
 +++ b/fs/nilfs2/segment.c
 @@ -24,6 +24,7 @@
  #include linux/pagemap.h
  #include linux/buffer_head.h
  #include linux/writeback.h
 +#include linux/memcontrol.h
  #include linux/bio.h
  #include linux/completion.h
  #include linux/blkdev.h
 @@ -1660,8 +1661,11 @@ nilfs_copy_replace_page_buffers(struct page *page, 
 struct list_head *out)
   } while (bh = bh-b_this_page, bh2 = bh2-b_this_page, bh != head);
   kunmap_atomic(kaddr, KM_USER0);
  
 - if (!TestSetPageWriteback(clone_page))
 + if (!TestSetPageWriteback(clone_page)) {
 + mem_cgroup_inc_page_stat_unlocked(clone_page,
 + MEMCG_NR_FILE_WRITEBACK);
   inc_zone_page_state(clone_page, NR_WRITEBACK);
 + }
   unlock_page(clone_page);
  
IIUC, this clone_page is not under memcg, too. Then, it can't be handled. (now)




   return 0;
 @@ -1783,8 +1787,11 @@ static void __nilfs_end_page_io(struct page *page, int 
 err)
   }
  
   if (buffer_nilfs_allocated(page_buffers(page))) {
 - if 

[Devel] Re: [PATCH -mmotm 4/4] memcg: dirty pages instrumentation

2010-03-04 Thread Vivek Goyal
On Thu, Mar 04, 2010 at 11:40:15AM +0100, Andrea Righi wrote:

[..]
 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
 index 5a0f8f3..c5d14ea 100644
 --- a/mm/page-writeback.c
 +++ b/mm/page-writeback.c
 @@ -137,13 +137,16 @@ static struct prop_descriptor vm_dirties;
   */
  static int calc_period_shift(void)
  {
 + struct dirty_param dirty_param;
   unsigned long dirty_total;
  
 - if (vm_dirty_bytes)
 - dirty_total = vm_dirty_bytes / PAGE_SIZE;
 + get_dirty_param(dirty_param);
 +
 + if (dirty_param.dirty_bytes)
 + dirty_total = dirty_param.dirty_bytes / PAGE_SIZE;
   else
 - dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
 - 100;
 + dirty_total = (dirty_param.dirty_ratio *
 + determine_dirtyable_memory()) / 100;
   return 2 + ilog2(dirty_total - 1);
  }
  
 @@ -408,41 +411,46 @@ static unsigned long highmem_dirtyable_memory(unsigned 
 long total)
   */
  unsigned long determine_dirtyable_memory(void)
  {
 - unsigned long x;
 -
 - x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
 + unsigned long memory;
 + s64 memcg_memory;
  
 + memory = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
   if (!vm_highmem_is_dirtyable)
 - x -= highmem_dirtyable_memory(x);
 -
 - return x + 1;   /* Ensure that we never return 0 */
 + memory -= highmem_dirtyable_memory(memory);
 + if (mem_cgroup_has_dirty_limit())
 + return memory + 1;

Should above be?
if (!mem_cgroup_has_dirty_limit())
return memory + 1;

Vivek

 + memcg_memory = mem_cgroup_page_stat(MEMCG_NR_DIRTYABLE_PAGES);
 + return min((unsigned long)memcg_memory, memory + 1);
  }
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH -mmotm 4/4] memcg: dirty pages instrumentation

2010-03-04 Thread Vivek Goyal
On Thu, Mar 04, 2010 at 11:40:15AM +0100, Andrea Righi wrote:

[..]
 diff --git a/mm/page-writeback.c b/mm/page-writeback.c
 index 5a0f8f3..c5d14ea 100644
 --- a/mm/page-writeback.c
 +++ b/mm/page-writeback.c
 @@ -137,13 +137,16 @@ static struct prop_descriptor vm_dirties;
   */
  static int calc_period_shift(void)
  {
 + struct dirty_param dirty_param;
   unsigned long dirty_total;
  
 - if (vm_dirty_bytes)
 - dirty_total = vm_dirty_bytes / PAGE_SIZE;
 + get_dirty_param(dirty_param);
 +
 + if (dirty_param.dirty_bytes)
 + dirty_total = dirty_param.dirty_bytes / PAGE_SIZE;
   else
 - dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
 - 100;
 + dirty_total = (dirty_param.dirty_ratio *
 + determine_dirtyable_memory()) / 100;
   return 2 + ilog2(dirty_total - 1);
  }
  

Hmm.., I have been staring at this for some time and I think something is
wrong. I don't fully understand the way floating proportions are working
but this function seems to be calculating the period over which we need
to measuer the proportions. (vm_completion proportion and vm_dirties
proportions).

And we this period (shift), when admin updates dirty_ratio or dirty_bytes
etc. In that case we recalculate the global dirty limit and take log2 and
use that as period over which we monitor and calculate proportions.

If yes, then it should be global and not per cgroup (because all our 
accouting of bdi completion is global and not per cgroup).

PeterZ, can tell us more about it. I am just raising the flag here to be
sure.

Thanks
Vivek
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH -mmotm 4/4] memcg: dirty pages instrumentation

2010-03-04 Thread Balbir Singh
* Andrea Righi ari...@develer.com [2010-03-04 11:40:15]:

 Apply the cgroup dirty pages accounting and limiting infrastructure
 to the opportune kernel functions.
 
 Signed-off-by: Andrea Righi ari...@develer.com
 ---
  fs/fuse/file.c  |5 +++
  fs/nfs/write.c  |4 ++
  fs/nilfs2/segment.c |   11 +-
  mm/filemap.c|1 +
  mm/page-writeback.c |   91 
 ++-
  mm/rmap.c   |4 +-
  mm/truncate.c   |2 +
  7 files changed, 84 insertions(+), 34 deletions(-)
 
 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
 index a9f5e13..dbbdd53 100644
 --- a/fs/fuse/file.c
 +++ b/fs/fuse/file.c
 @@ -11,6 +11,7 @@
  #include linux/pagemap.h
  #include linux/slab.h
  #include linux/kernel.h
 +#include linux/memcontrol.h
  #include linux/sched.h
  #include linux/module.h
 
 @@ -1129,6 +1130,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, 
 struct fuse_req *req)
 
   list_del(req-writepages_entry);
   dec_bdi_stat(bdi, BDI_WRITEBACK);
 + mem_cgroup_update_stat(req-pages[0],
 + MEM_CGROUP_STAT_WRITEBACK_TEMP, -1);
   dec_zone_page_state(req-pages[0], NR_WRITEBACK_TEMP);
   bdi_writeout_inc(bdi);
   wake_up(fi-page_waitq);
 @@ -1240,6 +1243,8 @@ static int fuse_writepage_locked(struct page *page)
   req-inode = inode;
 
   inc_bdi_stat(mapping-backing_dev_info, BDI_WRITEBACK);
 + mem_cgroup_update_stat(tmp_page,
 + MEM_CGROUP_STAT_WRITEBACK_TEMP, 1);
   inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
   end_page_writeback(page);
 
 diff --git a/fs/nfs/write.c b/fs/nfs/write.c
 index b753242..7316f7a 100644
 --- a/fs/nfs/write.c
 +++ b/fs/nfs/write.c
 @@ -439,6 +439,7 @@ nfs_mark_request_commit(struct nfs_page *req)
   req-wb_index,
   NFS_PAGE_TAG_COMMIT);
   spin_unlock(inode-i_lock);
 + mem_cgroup_update_stat(req-wb_page, MEM_CGROUP_STAT_UNSTABLE_NFS, 1);
   inc_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
   inc_bdi_stat(req-wb_page-mapping-backing_dev_info, BDI_UNSTABLE);
   __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 @@ -450,6 +451,7 @@ nfs_clear_request_commit(struct nfs_page *req)
   struct page *page = req-wb_page;
 
   if (test_and_clear_bit(PG_CLEAN, (req)-wb_flags)) {
 + mem_cgroup_update_stat(page, MEM_CGROUP_STAT_UNSTABLE_NFS, -1);
   dec_zone_page_state(page, NR_UNSTABLE_NFS);
   dec_bdi_stat(page-mapping-backing_dev_info, BDI_UNSTABLE);
   return 1;
 @@ -1273,6 +1275,8 @@ nfs_commit_list(struct inode *inode, struct list_head 
 *head, int how)
   req = nfs_list_entry(head-next);
   nfs_list_remove_request(req);
   nfs_mark_request_commit(req);
 + mem_cgroup_update_stat(req-wb_page,
 + MEM_CGROUP_STAT_UNSTABLE_NFS, -1);
   dec_zone_page_state(req-wb_page, NR_UNSTABLE_NFS);
   dec_bdi_stat(req-wb_page-mapping-backing_dev_info,
   BDI_UNSTABLE);
 diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
 index ada2f1b..27a01b1 100644
 --- a/fs/nilfs2/segment.c
 +++ b/fs/nilfs2/segment.c
 @@ -24,6 +24,7 @@
  #include linux/pagemap.h
  #include linux/buffer_head.h
  #include linux/writeback.h
 +#include linux/memcontrol.h
  #include linux/bio.h
  #include linux/completion.h
  #include linux/blkdev.h
 @@ -1660,8 +1661,11 @@ nilfs_copy_replace_page_buffers(struct page *page, 
 struct list_head *out)
   } while (bh = bh-b_this_page, bh2 = bh2-b_this_page, bh != head);
   kunmap_atomic(kaddr, KM_USER0);
 
 - if (!TestSetPageWriteback(clone_page))
 + if (!TestSetPageWriteback(clone_page)) {
 + mem_cgroup_update_stat(clone_page,
 + MEM_CGROUP_STAT_WRITEBACK, 1);

I wonder if we should start implementing inc and dec to avoid passing
the +1 and -1 parameters. It should make the code easier to read.

   inc_zone_page_state(clone_page, NR_WRITEBACK);
 + }
   unlock_page(clone_page);
 
   return 0;
 @@ -1783,8 +1787,11 @@ static void __nilfs_end_page_io(struct page *page, int 
 err)
   }
 
   if (buffer_nilfs_allocated(page_buffers(page))) {
 - if (TestClearPageWriteback(page))
 + if (TestClearPageWriteback(page)) {
 + mem_cgroup_update_stat(page,
 + MEM_CGROUP_STAT_WRITEBACK, -1);
   dec_zone_page_state(page, NR_WRITEBACK);
 + }
   } else
   end_page_writeback(page);
  }
 diff --git a/mm/filemap.c b/mm/filemap.c
 index fe09e51..f85acae 100644
 --- a/mm/filemap.c
 +++ b/mm/filemap.c
 @@ -135,6 +135,7 @@ void __remove_from_page_cache(struct page *page)
* having removed the page entirely.
*/
   if (PageDirty(page)  mapping_cap_account_dirty(mapping)) {
 +