[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Andrea Righi
On Tue, Mar 02, 2010 at 12:04:53PM +0200, Kirill A. Shutemov wrote:
[snip]
  +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
  +{
  +       return -ENOMEM;
 
 Why ENOMEM? Probably, EINVAL or ENOSYS?

OK, ENOSYS is more appropriate IMHO.

  +static s64 mem_cgroup_get_local_page_stat(struct mem_cgroup *memcg,
  +                               enum mem_cgroup_page_stat_item item)
  +{
  +       s64 ret;
  +
  +       switch (item) {
  +       case MEMCG_NR_DIRTYABLE_PAGES:
  +               ret = res_counter_read_u64(memcg-res, RES_LIMIT) -
  +                       res_counter_read_u64(memcg-res, RES_USAGE);
  +               /* Translate free memory in pages */
  +               ret = PAGE_SHIFT;
  +               ret += mem_cgroup_read_stat(memcg, LRU_ACTIVE_FILE) +
  +                       mem_cgroup_read_stat(memcg, LRU_INACTIVE_FILE);
  +               if (mem_cgroup_can_swap(memcg))
  +                       ret += mem_cgroup_read_stat(memcg, LRU_ACTIVE_ANON) 
  +
  +                               mem_cgroup_read_stat(memcg, 
  LRU_INACTIVE_ANON);
  +               break;
  +       case MEMCG_NR_RECLAIM_PAGES:
  +               ret = mem_cgroup_read_stat(memcg, 
  MEM_CGROUP_STAT_FILE_DIRTY) +
  +                       mem_cgroup_read_stat(memcg,
  +                                       MEM_CGROUP_STAT_UNSTABLE_NFS);
  +               break;
  +       case MEMCG_NR_WRITEBACK:
  +               ret = mem_cgroup_read_stat(memcg, 
  MEM_CGROUP_STAT_WRITEBACK);
  +               break;
  +       case MEMCG_NR_DIRTY_WRITEBACK_PAGES:
  +               ret = mem_cgroup_read_stat(memcg, 
  MEM_CGROUP_STAT_WRITEBACK) +
  +                       mem_cgroup_read_stat(memcg,
  +                               MEM_CGROUP_STAT_UNSTABLE_NFS);
  +               break;
  +       default:
  +               ret = 0;
  +               WARN_ON_ONCE(1);
 
 I think it's a bug, not warning.

OK.

  +       }
  +       return ret;
  +}
  +
  +static int mem_cgroup_page_stat_cb(struct mem_cgroup *mem, void *data)
  +{
  +       struct mem_cgroup_page_stat *stat = (struct mem_cgroup_page_stat 
  *)data;
  +
  +       stat-value += mem_cgroup_get_local_page_stat(mem, stat-item);
  +       return 0;
  +}
  +
  +s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
  +{
  +       struct mem_cgroup_page_stat stat = {};
  +       struct mem_cgroup *memcg;
  +
  +       if (mem_cgroup_disabled())
  +               return -ENOMEM;
 
 EINVAL/ENOSYS?

OK.

 
  +       rcu_read_lock();
  +       memcg = mem_cgroup_from_task(current);
  +       if (memcg) {
  +               /*
  +                * Recursively evaulate page statistics against all cgroup
  +                * under hierarchy tree
  +                */
  +               stat.item = item;
  +               mem_cgroup_walk_tree(memcg, stat, mem_cgroup_page_stat_cb);
  +       } else
  +               stat.value = -ENOMEM;
 
 ditto.

OK.

 
  +       rcu_read_unlock();
  +
  +       return stat.value;
  +}
  +
   static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data)
   {
         int *val = data;
  @@ -1263,14 +1418,16 @@ static void record_last_oom(struct mem_cgroup *mem)
   }
 
   /*
  - * Currently used to update mapped file statistics, but the routine can be
  - * generalized to update other statistics as well.
  + * Generalized routine to update memory cgroup statistics.
   */
  -void mem_cgroup_update_file_mapped(struct page *page, int val)
  +void mem_cgroup_update_stat(struct page *page,
  +                       enum mem_cgroup_stat_index idx, int val)
 
 EXPORT_SYMBOL_GPL(mem_cgroup_update_stat) is needed, since
 it uses by filesystems.

Agreed.

  +static int
  +mem_cgroup_dirty_write(struct cgroup *cgrp, struct cftype *cft, u64 val)
  +{
  +       struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
  +       int type = cft-private;
  +
  +       if (cgrp-parent == NULL)
  +               return -EINVAL;
  +       if (((type == MEM_CGROUP_DIRTY_RATIO) ||
  +               (type == MEM_CGROUP_DIRTY_BACKGROUND_RATIO))  (val  100))
 
 Too many unnecessary brackets
 
if ((type == MEM_CGROUP_DIRTY_RATIO ||
type == MEM_CGROUP_DIRTY_BACKGROUND_RATIO)  val  100)
 

OK.

Thanks,
-Andrea
___
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

___
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel


[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Greg Thelen
Comments below.  Yet to be tested on my end, but I will test it.

On Mon, Mar 1, 2010 at 1:23 PM, Andrea Righi ari...@develer.com wrote:
 Infrastructure to account dirty pages per cgroup and add dirty limit
 interfaces in the cgroupfs:

  - Direct write-out: memory.dirty_ratio, memory.dirty_bytes

  - Background write-out: memory.dirty_background_ratio, 
 memory.dirty_background_bytes

 Signed-off-by: Andrea Righi ari...@develer.com
 ---
  include/linux/memcontrol.h |   77 ++-
  mm/memcontrol.c            |  336 
 
  2 files changed, 384 insertions(+), 29 deletions(-)

 diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
 index 1f9b119..cc88b2e 100644
 --- a/include/linux/memcontrol.h
 +++ b/include/linux/memcontrol.h
 @@ -19,12 +19,50 @@

  #ifndef _LINUX_MEMCONTROL_H
  #define _LINUX_MEMCONTROL_H
 +
 +#include linux/writeback.h
  #include linux/cgroup.h
 +
  struct mem_cgroup;
  struct page_cgroup;
  struct page;
  struct mm_struct;

 +/* Cgroup memory statistics items exported to the kernel */
 +enum mem_cgroup_page_stat_item {
 +       MEMCG_NR_DIRTYABLE_PAGES,
 +       MEMCG_NR_RECLAIM_PAGES,
 +       MEMCG_NR_WRITEBACK,
 +       MEMCG_NR_DIRTY_WRITEBACK_PAGES,
 +};
 +
 +/*
 + * Statistics for memory cgroup.
 + */
 +enum mem_cgroup_stat_index {
 +       /*
 +        * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
 +        */
 +       MEM_CGROUP_STAT_CACHE,     /* # of pages charged as cache */
 +       MEM_CGROUP_STAT_RSS,       /* # of pages charged as anon rss */
 +       MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
 +       MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
 +       MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
 +       MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
 +       MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 +       MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out.
 +                                       used by soft limit implementation */
 +       MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out.
 +                                       used by threshold implementation */
 +       MEM_CGROUP_STAT_FILE_DIRTY,   /* # of dirty pages in page cache */
 +       MEM_CGROUP_STAT_WRITEBACK,   /* # of pages under writeback */
 +       MEM_CGROUP_STAT_WRITEBACK_TEMP,   /* # of pages under writeback using
 +                                               temporary buffers */
 +       MEM_CGROUP_STAT_UNSTABLE_NFS,   /* # of NFS unstable pages */
 +
 +       MEM_CGROUP_STAT_NSTATS,
 +};
 +
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /*
  * All charge functions with gfp_mask should use GFP_KERNEL or
 @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
 *memcg,
  extern int do_swap_account;
  #endif

 +extern long mem_cgroup_dirty_ratio(void);
 +extern unsigned long mem_cgroup_dirty_bytes(void);
 +extern long mem_cgroup_dirty_background_ratio(void);
 +extern unsigned long mem_cgroup_dirty_background_bytes(void);
 +
 +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item);
 +
  static inline bool mem_cgroup_disabled(void)
  {
        if (mem_cgroup_subsys.disabled)
 @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void)
  }

  extern bool mem_cgroup_oom_called(struct task_struct *task);
 -void mem_cgroup_update_file_mapped(struct page *page, int val);
 +void mem_cgroup_update_stat(struct page *page,
 +                       enum mem_cgroup_stat_index idx, int val);
  unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                                                gfp_t gfp_mask, int nid,
                                                int zid);
 @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 
 struct task_struct *p)
  {
  }

 -static inline void mem_cgroup_update_file_mapped(struct page *page,
 -                                                       int val)
 +static inline void mem_cgroup_update_stat(struct page *page,
 +                       enum mem_cgroup_stat_index idx, int val)
  {
  }

 @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone 
 *zone, int order,
        return 0;
  }

 +static inline long mem_cgroup_dirty_ratio(void)
 +{
 +       return vm_dirty_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_bytes(void)
 +{
 +       return vm_dirty_bytes;
 +}
 +
 +static inline long mem_cgroup_dirty_background_ratio(void)
 +{
 +       return dirty_background_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_background_bytes(void)
 +{
 +       return dirty_background_bytes;
 +}
 +
 +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
 +{
 +       return -ENOMEM;
 +}
 +
  #endif /* CONFIG_CGROUP_MEM_CONT */

  #endif /* _LINUX_MEMCONTROL_H */
 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
 index a443c30..e74cf66 100644
 --- a/mm/memcontrol.c
 

[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Andrea Righi
On Tue, Mar 02, 2010 at 06:32:24PM +0530, Balbir Singh wrote:

[snip]

  +extern long mem_cgroup_dirty_ratio(void);
  +extern unsigned long mem_cgroup_dirty_bytes(void);
  +extern long mem_cgroup_dirty_background_ratio(void);
  +extern unsigned long mem_cgroup_dirty_background_bytes(void);
  +
  +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item);
  +
 
 Docstyle comments for each function would be appreciated

OK.

   /*
* The memory controller data structure. The memory controller controls 
  both
* page cache and RSS per cgroup. We would eventually like to provide
  @@ -205,6 +199,9 @@ struct mem_cgroup {
  
  unsigned intswappiness;
  
  +   /* control memory cgroup dirty pages */
  +   unsigned long dirty_param[MEM_CGROUP_DIRTY_NPARAMS];
  +
 
 Could you mention what protects this field, is it the reclaim_lock?

Yes, it is.

Actually, we could avoid the lock completely for dirty_param[], using a
validation routine to check for incoherencies after any read with
get_dirty_param(), and retry if the validation fails. In practice, the
same approach we're using to read global vm_dirty_ratio, vm_dirty_bytes,
etc...

Considering that those values are rarely written and read often we can
protect them in a RCU way.


 BTW, is unsigned long sufficient to represent dirty_param(s)?

Yes, I think. It's the same type used for the equivalent global values.

 
  /* set when res.limit == memsw.limit */
  boolmemsw_is_minimum;
  
  @@ -1021,6 +1018,164 @@ static unsigned int get_swappiness(struct 
  mem_cgroup *memcg)
  return swappiness;
   }
  
  +static unsigned long get_dirty_param(struct mem_cgroup *memcg,
  +   enum mem_cgroup_dirty_param idx)
  +{
  +   unsigned long ret;
  +
  +   VM_BUG_ON(idx = MEM_CGROUP_DIRTY_NPARAMS);
  +   spin_lock(memcg-reclaim_param_lock);
  +   ret = memcg-dirty_param[idx];
  +   spin_unlock(memcg-reclaim_param_lock);
 
 Do we need a spinlock if we protect it using RCU? Is precise data very
 important?

See above.

  +unsigned long mem_cgroup_dirty_background_bytes(void)
  +{
  +   struct mem_cgroup *memcg;
  +   unsigned long ret = dirty_background_bytes;
  +
  +   if (mem_cgroup_disabled())
  +   return ret;
  +   rcu_read_lock();
  +   memcg = mem_cgroup_from_task(current);
  +   if (likely(memcg))
  +   ret = get_dirty_param(memcg, MEM_CGROUP_DIRTY_BACKGROUND_BYTES);
  +   rcu_read_unlock();
  +
  +   return ret;
  +}
  +
  +static inline bool mem_cgroup_can_swap(struct mem_cgroup *memcg)
  +{
  +   return do_swap_account ?
  +   res_counter_read_u64(memcg-memsw, RES_LIMIT) :
 
 Shouldn't you do a res_counter_read_u64(...)  0 for readability?

OK.

 What happens if memcg-res, RES_LIMIT == memcg-memsw, RES_LIMIT?

OK, we should also check memcg-memsw_is_minimum.

   static struct cgroup_subsys_state * __ref
   mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
   {
  @@ -3776,8 +4031,37 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct 
  cgroup *cont)
  mem-last_scanned_child = 0;
  spin_lock_init(mem-reclaim_param_lock);
  
  -   if (parent)
  +   if (parent) {
  mem-swappiness = get_swappiness(parent);
  +
  +   spin_lock(parent-reclaim_param_lock);
  +   copy_dirty_params(mem, parent);
  +   spin_unlock(parent-reclaim_param_lock);
  +   } else {
  +   /*
  +* XXX: should we need a lock here? we could switch from
  +* vm_dirty_ratio to vm_dirty_bytes or vice versa but we're not
  +* reading them atomically. The same for dirty_background_ratio
  +* and dirty_background_bytes.
  +*
  +* For now, try to read them speculatively and retry if a
  +* conflict is detected.a
 
 The do while loop is subtle, can we add a validate check,share it with
 the write routine and retry if validation fails?

Agreed.

 
  +*/
  +   do {
  +   mem-dirty_param[MEM_CGROUP_DIRTY_RATIO] =
  +   vm_dirty_ratio;
  +   mem-dirty_param[MEM_CGROUP_DIRTY_BYTES] =
  +   vm_dirty_bytes;
  +   } while (mem-dirty_param[MEM_CGROUP_DIRTY_RATIO] 
  +mem-dirty_param[MEM_CGROUP_DIRTY_BYTES]);
  +   do {
  +   mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_RATIO] =
  +   dirty_background_ratio;
  +   mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_BYTES] =
  +   dirty_background_bytes;
  +   } while (mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_RATIO] 
  +   mem-dirty_param[MEM_CGROUP_DIRTY_BACKGROUND_BYTES]);
  +   }
  atomic_set(mem-refcnt, 1);
  mem-move_charge_at_immigrate = 0;
  mutex_init(mem-thresholds_lock);

Many thanks for reviewing,
-Andrea

[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure

2010-03-30 Thread Andrea Righi
On Tue, Mar 02, 2010 at 10:08:17AM -0800, Greg Thelen wrote:
 Comments below.  Yet to be tested on my end, but I will test it.
 
 On Mon, Mar 1, 2010 at 1:23 PM, Andrea Righi ari...@develer.com wrote:
  Infrastructure to account dirty pages per cgroup and add dirty limit
  interfaces in the cgroupfs:
 
   - Direct write-out: memory.dirty_ratio, memory.dirty_bytes
 
   - Background write-out: memory.dirty_background_ratio, 
  memory.dirty_background_bytes
 
  Signed-off-by: Andrea Righi ari...@develer.com
  ---
   include/linux/memcontrol.h |   77 ++-
   mm/memcontrol.c            |  336 
  
   2 files changed, 384 insertions(+), 29 deletions(-)
 
  diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
  index 1f9b119..cc88b2e 100644
  --- a/include/linux/memcontrol.h
  +++ b/include/linux/memcontrol.h
  @@ -19,12 +19,50 @@
 
   #ifndef _LINUX_MEMCONTROL_H
   #define _LINUX_MEMCONTROL_H
  +
  +#include linux/writeback.h
   #include linux/cgroup.h
  +
   struct mem_cgroup;
   struct page_cgroup;
   struct page;
   struct mm_struct;
 
  +/* Cgroup memory statistics items exported to the kernel */
  +enum mem_cgroup_page_stat_item {
  +       MEMCG_NR_DIRTYABLE_PAGES,
  +       MEMCG_NR_RECLAIM_PAGES,
  +       MEMCG_NR_WRITEBACK,
  +       MEMCG_NR_DIRTY_WRITEBACK_PAGES,
  +};
  +
  +/*
  + * Statistics for memory cgroup.
  + */
  +enum mem_cgroup_stat_index {
  +       /*
  +        * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
  +        */
  +       MEM_CGROUP_STAT_CACHE,     /* # of pages charged as cache */
  +       MEM_CGROUP_STAT_RSS,       /* # of pages charged as anon rss */
  +       MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
  +       MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
  +       MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
  +       MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use 
  */
  +       MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
  +       MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out.
  +                                       used by soft limit implementation */
  +       MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out.
  +                                       used by threshold implementation */
  +       MEM_CGROUP_STAT_FILE_DIRTY,   /* # of dirty pages in page cache */
  +       MEM_CGROUP_STAT_WRITEBACK,   /* # of pages under writeback */
  +       MEM_CGROUP_STAT_WRITEBACK_TEMP,   /* # of pages under writeback 
  using
  +                                               temporary buffers */
  +       MEM_CGROUP_STAT_UNSTABLE_NFS,   /* # of NFS unstable pages */
  +
  +       MEM_CGROUP_STAT_NSTATS,
  +};
  +
   #ifdef CONFIG_CGROUP_MEM_RES_CTLR
   /*
   * All charge functions with gfp_mask should use GFP_KERNEL or
  @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct 
  mem_cgroup *memcg,
   extern int do_swap_account;
   #endif
 
  +extern long mem_cgroup_dirty_ratio(void);
  +extern unsigned long mem_cgroup_dirty_bytes(void);
  +extern long mem_cgroup_dirty_background_ratio(void);
  +extern unsigned long mem_cgroup_dirty_background_bytes(void);
  +
  +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item);
  +
   static inline bool mem_cgroup_disabled(void)
   {
         if (mem_cgroup_subsys.disabled)
  @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void)
   }
 
   extern bool mem_cgroup_oom_called(struct task_struct *task);
  -void mem_cgroup_update_file_mapped(struct page *page, int val);
  +void mem_cgroup_update_stat(struct page *page,
  +                       enum mem_cgroup_stat_index idx, int val);
   unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                                                 gfp_t gfp_mask, int nid,
                                                 int zid);
  @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 
  struct task_struct *p)
   {
   }
 
  -static inline void mem_cgroup_update_file_mapped(struct page *page,
  -                                                       int val)
  +static inline void mem_cgroup_update_stat(struct page *page,
  +                       enum mem_cgroup_stat_index idx, int val)
   {
   }
 
  @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct 
  zone *zone, int order,
         return 0;
   }
 
  +static inline long mem_cgroup_dirty_ratio(void)
  +{
  +       return vm_dirty_ratio;
  +}
  +
  +static inline unsigned long mem_cgroup_dirty_bytes(void)
  +{
  +       return vm_dirty_bytes;
  +}
  +
  +static inline long mem_cgroup_dirty_background_ratio(void)
  +{
  +       return dirty_background_ratio;
  +}
  +
  +static inline unsigned long mem_cgroup_dirty_background_bytes(void)
  +{
  +       return dirty_background_bytes;
  +}
  +
  +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
  +{
  +   

[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure

2010-03-02 Thread Kirill A. Shutemov
On Mon, Mar 1, 2010 at 11:23 PM, Andrea Righi ari...@develer.com wrote:
 Infrastructure to account dirty pages per cgroup and add dirty limit
 interfaces in the cgroupfs:

  - Direct write-out: memory.dirty_ratio, memory.dirty_bytes

  - Background write-out: memory.dirty_background_ratio, 
 memory.dirty_background_bytes

 Signed-off-by: Andrea Righi ari...@develer.com
 ---
  include/linux/memcontrol.h |   77 ++-
  mm/memcontrol.c            |  336 
 
  2 files changed, 384 insertions(+), 29 deletions(-)

 diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
 index 1f9b119..cc88b2e 100644
 --- a/include/linux/memcontrol.h
 +++ b/include/linux/memcontrol.h
 @@ -19,12 +19,50 @@

  #ifndef _LINUX_MEMCONTROL_H
  #define _LINUX_MEMCONTROL_H
 +
 +#include linux/writeback.h
  #include linux/cgroup.h
 +
  struct mem_cgroup;
  struct page_cgroup;
  struct page;
  struct mm_struct;

 +/* Cgroup memory statistics items exported to the kernel */
 +enum mem_cgroup_page_stat_item {
 +       MEMCG_NR_DIRTYABLE_PAGES,
 +       MEMCG_NR_RECLAIM_PAGES,
 +       MEMCG_NR_WRITEBACK,
 +       MEMCG_NR_DIRTY_WRITEBACK_PAGES,
 +};
 +
 +/*
 + * Statistics for memory cgroup.
 + */
 +enum mem_cgroup_stat_index {
 +       /*
 +        * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
 +        */
 +       MEM_CGROUP_STAT_CACHE,     /* # of pages charged as cache */
 +       MEM_CGROUP_STAT_RSS,       /* # of pages charged as anon rss */
 +       MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
 +       MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
 +       MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
 +       MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
 +       MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 +       MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out.
 +                                       used by soft limit implementation */
 +       MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out.
 +                                       used by threshold implementation */
 +       MEM_CGROUP_STAT_FILE_DIRTY,   /* # of dirty pages in page cache */
 +       MEM_CGROUP_STAT_WRITEBACK,   /* # of pages under writeback */
 +       MEM_CGROUP_STAT_WRITEBACK_TEMP,   /* # of pages under writeback using
 +                                               temporary buffers */
 +       MEM_CGROUP_STAT_UNSTABLE_NFS,   /* # of NFS unstable pages */
 +
 +       MEM_CGROUP_STAT_NSTATS,
 +};
 +
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /*
  * All charge functions with gfp_mask should use GFP_KERNEL or
 @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
 *memcg,
  extern int do_swap_account;
  #endif

 +extern long mem_cgroup_dirty_ratio(void);
 +extern unsigned long mem_cgroup_dirty_bytes(void);
 +extern long mem_cgroup_dirty_background_ratio(void);
 +extern unsigned long mem_cgroup_dirty_background_bytes(void);
 +
 +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item);
 +
  static inline bool mem_cgroup_disabled(void)
  {
        if (mem_cgroup_subsys.disabled)
 @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void)
  }

  extern bool mem_cgroup_oom_called(struct task_struct *task);
 -void mem_cgroup_update_file_mapped(struct page *page, int val);
 +void mem_cgroup_update_stat(struct page *page,
 +                       enum mem_cgroup_stat_index idx, int val);
  unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
                                                gfp_t gfp_mask, int nid,
                                                int zid);
 @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 
 struct task_struct *p)
  {
  }

 -static inline void mem_cgroup_update_file_mapped(struct page *page,
 -                                                       int val)
 +static inline void mem_cgroup_update_stat(struct page *page,
 +                       enum mem_cgroup_stat_index idx, int val)
  {
  }

 @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone 
 *zone, int order,
        return 0;
  }

 +static inline long mem_cgroup_dirty_ratio(void)
 +{
 +       return vm_dirty_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_bytes(void)
 +{
 +       return vm_dirty_bytes;
 +}
 +
 +static inline long mem_cgroup_dirty_background_ratio(void)
 +{
 +       return dirty_background_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_background_bytes(void)
 +{
 +       return dirty_background_bytes;
 +}
 +
 +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
 +{
 +       return -ENOMEM;

Why ENOMEM? Probably, EINVAL or ENOSYS?

 +}
 +
  #endif /* CONFIG_CGROUP_MEM_CONT */

  #endif /* _LINUX_MEMCONTROL_H */
 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
 index a443c30..e74cf66 100644
 --- a/mm/memcontrol.c
 +++ b/mm/memcontrol.c
 

[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure

2010-03-02 Thread Balbir Singh
* Andrea Righi ari...@develer.com [2010-03-01 22:23:39]:

 Infrastructure to account dirty pages per cgroup and add dirty limit
 interfaces in the cgroupfs:
 
  - Direct write-out: memory.dirty_ratio, memory.dirty_bytes
 
  - Background write-out: memory.dirty_background_ratio, 
 memory.dirty_background_bytes
 
 Signed-off-by: Andrea Righi ari...@develer.com

Look good, but yet to be tested from my side.


 ---
  include/linux/memcontrol.h |   77 ++-
  mm/memcontrol.c|  336 
 
  2 files changed, 384 insertions(+), 29 deletions(-)
 
 diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
 index 1f9b119..cc88b2e 100644
 --- a/include/linux/memcontrol.h
 +++ b/include/linux/memcontrol.h
 @@ -19,12 +19,50 @@
 
  #ifndef _LINUX_MEMCONTROL_H
  #define _LINUX_MEMCONTROL_H
 +
 +#include linux/writeback.h
  #include linux/cgroup.h
 +
  struct mem_cgroup;
  struct page_cgroup;
  struct page;
  struct mm_struct;
 
 +/* Cgroup memory statistics items exported to the kernel */
 +enum mem_cgroup_page_stat_item {
 + MEMCG_NR_DIRTYABLE_PAGES,
 + MEMCG_NR_RECLAIM_PAGES,
 + MEMCG_NR_WRITEBACK,
 + MEMCG_NR_DIRTY_WRITEBACK_PAGES,
 +};
 +
 +/*
 + * Statistics for memory cgroup.
 + */
 +enum mem_cgroup_stat_index {
 + /*
 +  * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
 +  */
 + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
 + MEM_CGROUP_STAT_RSS,   /* # of pages charged as anon rss */
 + MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
 + MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
 + MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
 + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
 + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 + MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out.
 + used by soft limit implementation */
 + MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out.
 + used by threshold implementation */
 + MEM_CGROUP_STAT_FILE_DIRTY,   /* # of dirty pages in page cache */
 + MEM_CGROUP_STAT_WRITEBACK,   /* # of pages under writeback */
 + MEM_CGROUP_STAT_WRITEBACK_TEMP,   /* # of pages under writeback using
 + temporary buffers */
 + MEM_CGROUP_STAT_UNSTABLE_NFS,   /* # of NFS unstable pages */
 +
 + MEM_CGROUP_STAT_NSTATS,
 +};
 +
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /*
   * All charge functions with gfp_mask should use GFP_KERNEL or
 @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
 *memcg,
  extern int do_swap_account;
  #endif
 
 +extern long mem_cgroup_dirty_ratio(void);
 +extern unsigned long mem_cgroup_dirty_bytes(void);
 +extern long mem_cgroup_dirty_background_ratio(void);
 +extern unsigned long mem_cgroup_dirty_background_bytes(void);
 +
 +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item);
 +

Docstyle comments for each function would be appreciated

  static inline bool mem_cgroup_disabled(void)
  {
   if (mem_cgroup_subsys.disabled)
 @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void)
  }
 
  extern bool mem_cgroup_oom_called(struct task_struct *task);
 -void mem_cgroup_update_file_mapped(struct page *page, int val);

Good to see you make generic use of this function

 +void mem_cgroup_update_stat(struct page *page,
 + enum mem_cgroup_stat_index idx, int val);
  unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
   gfp_t gfp_mask, int nid,
   int zid);
 @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 
 struct task_struct *p)
  {
  }
 
 -static inline void mem_cgroup_update_file_mapped(struct page *page,
 - int val)
 +static inline void mem_cgroup_update_stat(struct page *page,
 + enum mem_cgroup_stat_index idx, int val)
  {
  }
 
 @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone 
 *zone, int order,
   return 0;
  }
 
 +static inline long mem_cgroup_dirty_ratio(void)
 +{
 + return vm_dirty_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_bytes(void)
 +{
 + return vm_dirty_bytes;
 +}
 +
 +static inline long mem_cgroup_dirty_background_ratio(void)
 +{
 + return dirty_background_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_background_bytes(void)
 +{
 + return dirty_background_bytes;
 +}
 +
 +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
 +{
 + return -ENOMEM;
 +}
 +
  #endif /* CONFIG_CGROUP_MEM_CONT */
 
  #endif /* _LINUX_MEMCONTROL_H */
 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
 index a443c30..e74cf66 

[Devel] Re: [PATCH -mmotm 2/3] memcg: dirty pages accounting and limiting infrastructure

2010-03-01 Thread KAMEZAWA Hiroyuki
On Mon,  1 Mar 2010 22:23:39 +0100
Andrea Righi ari...@develer.com wrote:

 Infrastructure to account dirty pages per cgroup and add dirty limit
 interfaces in the cgroupfs:
 
  - Direct write-out: memory.dirty_ratio, memory.dirty_bytes
 
  - Background write-out: memory.dirty_background_ratio, 
 memory.dirty_background_bytes
 
 Signed-off-by: Andrea Righi ari...@develer.com

seems nice. You can add my ack.

Acked-by: KAMEZAWA Hiroyuki kamezawa.hir...@jp.fujitsu.com

But maybe you have to wait for a while. (some amounts of series of
patches are posted...) please be patient.

BTW, No TODO anymore ?


Regards,
-Kame

 ---
  include/linux/memcontrol.h |   77 ++-
  mm/memcontrol.c|  336 
 
  2 files changed, 384 insertions(+), 29 deletions(-)
 
 diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
 index 1f9b119..cc88b2e 100644
 --- a/include/linux/memcontrol.h
 +++ b/include/linux/memcontrol.h
 @@ -19,12 +19,50 @@
  
  #ifndef _LINUX_MEMCONTROL_H
  #define _LINUX_MEMCONTROL_H
 +
 +#include linux/writeback.h
  #include linux/cgroup.h
 +
  struct mem_cgroup;
  struct page_cgroup;
  struct page;
  struct mm_struct;
  
 +/* Cgroup memory statistics items exported to the kernel */
 +enum mem_cgroup_page_stat_item {
 + MEMCG_NR_DIRTYABLE_PAGES,
 + MEMCG_NR_RECLAIM_PAGES,
 + MEMCG_NR_WRITEBACK,
 + MEMCG_NR_DIRTY_WRITEBACK_PAGES,
 +};
 +
 +/*
 + * Statistics for memory cgroup.
 + */
 +enum mem_cgroup_stat_index {
 + /*
 +  * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
 +  */
 + MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
 + MEM_CGROUP_STAT_RSS,   /* # of pages charged as anon rss */
 + MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
 + MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
 + MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
 + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
 + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 + MEM_CGROUP_STAT_SOFTLIMIT, /* decrements on each page in/out.
 + used by soft limit implementation */
 + MEM_CGROUP_STAT_THRESHOLDS, /* decrements on each page in/out.
 + used by threshold implementation */
 + MEM_CGROUP_STAT_FILE_DIRTY,   /* # of dirty pages in page cache */
 + MEM_CGROUP_STAT_WRITEBACK,   /* # of pages under writeback */
 + MEM_CGROUP_STAT_WRITEBACK_TEMP,   /* # of pages under writeback using
 + temporary buffers */
 + MEM_CGROUP_STAT_UNSTABLE_NFS,   /* # of NFS unstable pages */
 +
 + MEM_CGROUP_STAT_NSTATS,
 +};
 +
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /*
   * All charge functions with gfp_mask should use GFP_KERNEL or
 @@ -117,6 +155,13 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup 
 *memcg,
  extern int do_swap_account;
  #endif
  
 +extern long mem_cgroup_dirty_ratio(void);
 +extern unsigned long mem_cgroup_dirty_bytes(void);
 +extern long mem_cgroup_dirty_background_ratio(void);
 +extern unsigned long mem_cgroup_dirty_background_bytes(void);
 +
 +extern s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item);
 +
  static inline bool mem_cgroup_disabled(void)
  {
   if (mem_cgroup_subsys.disabled)
 @@ -125,7 +170,8 @@ static inline bool mem_cgroup_disabled(void)
  }
  
  extern bool mem_cgroup_oom_called(struct task_struct *task);
 -void mem_cgroup_update_file_mapped(struct page *page, int val);
 +void mem_cgroup_update_stat(struct page *page,
 + enum mem_cgroup_stat_index idx, int val);
  unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
   gfp_t gfp_mask, int nid,
   int zid);
 @@ -300,8 +346,8 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 
 struct task_struct *p)
  {
  }
  
 -static inline void mem_cgroup_update_file_mapped(struct page *page,
 - int val)
 +static inline void mem_cgroup_update_stat(struct page *page,
 + enum mem_cgroup_stat_index idx, int val)
  {
  }
  
 @@ -312,6 +358,31 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone 
 *zone, int order,
   return 0;
  }
  
 +static inline long mem_cgroup_dirty_ratio(void)
 +{
 + return vm_dirty_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_bytes(void)
 +{
 + return vm_dirty_bytes;
 +}
 +
 +static inline long mem_cgroup_dirty_background_ratio(void)
 +{
 + return dirty_background_ratio;
 +}
 +
 +static inline unsigned long mem_cgroup_dirty_background_bytes(void)
 +{
 + return dirty_background_bytes;
 +}
 +
 +static inline s64 mem_cgroup_page_stat(enum mem_cgroup_page_stat_item item)
 +{
 + return -ENOMEM;
 +}
 +
  #endif /* CONFIG_CGROUP_MEM_CONT */