[RFC][PATCH][2/4] Add RSS accounting and control (v2)

2007-02-25 Thread Balbir Singh


Changelog

1. Be consistent, use the C style of returning 0 on success and negative
   values on failure
2. Change and document the locking used by the controller
   (I hope I got it right this time :-))
3. Remove memctlr_double_(un)lock routines
4. Comment the usage of MEMCONTROL_DONT_CHECK_LIMIT

This patch adds the basic accounting hooks to account for pages allocated
into the RSS of a process. Accounting is maintained at two levels, in
the mm_struct of each task and in the memory controller data structure
associated with each node in the container.

When the limit specified for the container is exceeded, the task is killed.
RSS accounting is consistent with the current definition of RSS in the
kernel. Shared pages are accounted into the RSS of each process as is
done in the kernel currently. The code is flexible in that it can be easily
modified to work with any definition of RSS.


Signed-off-by: <[EMAIL PROTECTED]>
---

 fs/exec.c  |4 +
 include/linux/memcontrol.h |   47 
 include/linux/sched.h  |   11 +++
 kernel/fork.c  |   10 +++
 mm/memcontrol.c|  130 ++---
 mm/memory.c|   34 ++-
 mm/rmap.c  |5 +
 mm/swapfile.c  |2 
 8 files changed, 234 insertions(+), 9 deletions(-)

diff -puN fs/exec.c~memcontrol-acct fs/exec.c
--- linux-2.6.20/fs/exec.c~memcontrol-acct  2007-02-24 19:39:29.0 
+0530
+++ linux-2.6.20-balbir/fs/exec.c   2007-02-24 19:39:29.0 +0530
@@ -50,6 +50,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -313,6 +314,9 @@ void install_arg_page(struct vm_area_str
if (unlikely(anon_vma_prepare(vma)))
goto out;
 
+   if (memcontrol_update_rss(mm, 1, MEMCONTROL_CHECK_LIMIT))
+   goto out;
+
flush_dcache_page(page);
pte = get_locked_pte(mm, address, );
if (!pte)
diff -puN include/linux/memcontrol.h~memcontrol-acct include/linux/memcontrol.h
--- linux-2.6.20/include/linux/memcontrol.h~memcontrol-acct 2007-02-24 
19:39:29.0 +0530
+++ linux-2.6.20-balbir/include/linux/memcontrol.h  2007-02-24 
19:39:29.0 +0530
@@ -22,12 +22,59 @@
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
 
+/*
+ * MEMCONTROL_DONT_CHECK_LIMIT is useful for the following cases
+ * 1. During fork(), since pages are shared COW, we don't enforce limits
+ *on fork
+ * 2. During zeromap_pte_range(), again we don't enforce the limit for
+ *sharing ZERO_PAGE() in this case
+ * 3. When we actually reduce the RSS, add -1 to the rss
+ * It is generally useful when we do not want to enforce limits
+ */
+enum {
+   MEMCONTROL_CHECK_LIMIT = true,
+   MEMCONTROL_DONT_CHECK_LIMIT = false,
+};
+
 #ifdef CONFIG_CONTAINER_MEMCONTROL
+
 #ifndef kB
 #define kB 1024/* One Kilo Byte */
 #endif
 
+struct res_counter {
+   atomic_long_t usage;/* The current usage of the resource being */
+   /* counted */
+   atomic_long_t limit;/* The limit on the resource   */
+};
+
+extern int memcontrol_mm_init(struct mm_struct *mm);
+extern void memcontrol_mm_free(struct mm_struct *mm);
+extern void memcontrol_mm_assign_container(struct mm_struct *mm,
+   struct task_struct *p);
+extern int memcontrol_update_rss(struct mm_struct *mm, int count, bool check);
+
 #else /* CONFIG_CONTAINER_MEMCONTROL  */
 
+static inline int memcontrol_mm_init(struct mm_struct *mm)
+{
+   return 0;
+}
+
+static inline void memcontrol_mm_free(struct mm_struct *mm)
+{
+}
+
+static inline void memcontrol_mm_assign_container(struct mm_struct *mm,
+   struct task_struct *p)
+{
+}
+
+static inline int memcontrol_update_rss(struct mm_struct *mm, int count,
+   bool check)
+{
+   return 0;
+}
+
 #endif /* CONFIG_CONTAINER_MEMCONTROL */
 #endif /* _LINUX_MEMCONTROL_H */
diff -puN include/linux/sched.h~memcontrol-acct include/linux/sched.h
--- linux-2.6.20/include/linux/sched.h~memcontrol-acct  2007-02-24 
19:39:29.0 +0530
+++ linux-2.6.20-balbir/include/linux/sched.h   2007-02-24 19:39:29.0 
+0530
@@ -83,6 +83,7 @@ struct sched_param {
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -373,6 +374,16 @@ struct mm_struct {
/* aio bits */
rwlock_tioctx_list_lock;
struct kioctx   *ioctx_list;
+#ifdef CONFIG_CONTAINER_MEMCONTROL
+   /*
+* Each mm_struct's container, sums up in the container's counter
+* We can extend this such that, VMA's counters sum up into this
+* counter
+*/
+   struct res_counter  *counter;
+   struct container*container;
+   rwlock_tcontainer_lock;
+#endif 

[RFC][PATCH][2/4] Add RSS accounting and control (v2)

2007-02-25 Thread Balbir Singh


Changelog

1. Be consistent, use the C style of returning 0 on success and negative
   values on failure
2. Change and document the locking used by the controller
   (I hope I got it right this time :-))
3. Remove memctlr_double_(un)lock routines
4. Comment the usage of MEMCONTROL_DONT_CHECK_LIMIT

This patch adds the basic accounting hooks to account for pages allocated
into the RSS of a process. Accounting is maintained at two levels, in
the mm_struct of each task and in the memory controller data structure
associated with each node in the container.

When the limit specified for the container is exceeded, the task is killed.
RSS accounting is consistent with the current definition of RSS in the
kernel. Shared pages are accounted into the RSS of each process as is
done in the kernel currently. The code is flexible in that it can be easily
modified to work with any definition of RSS.


Signed-off-by: [EMAIL PROTECTED]
---

 fs/exec.c  |4 +
 include/linux/memcontrol.h |   47 
 include/linux/sched.h  |   11 +++
 kernel/fork.c  |   10 +++
 mm/memcontrol.c|  130 ++---
 mm/memory.c|   34 ++-
 mm/rmap.c  |5 +
 mm/swapfile.c  |2 
 8 files changed, 234 insertions(+), 9 deletions(-)

diff -puN fs/exec.c~memcontrol-acct fs/exec.c
--- linux-2.6.20/fs/exec.c~memcontrol-acct  2007-02-24 19:39:29.0 
+0530
+++ linux-2.6.20-balbir/fs/exec.c   2007-02-24 19:39:29.0 +0530
@@ -50,6 +50,7 @@
 #include linux/tsacct_kern.h
 #include linux/cn_proc.h
 #include linux/audit.h
+#include linux/memcontrol.h
 
 #include asm/uaccess.h
 #include asm/mmu_context.h
@@ -313,6 +314,9 @@ void install_arg_page(struct vm_area_str
if (unlikely(anon_vma_prepare(vma)))
goto out;
 
+   if (memcontrol_update_rss(mm, 1, MEMCONTROL_CHECK_LIMIT))
+   goto out;
+
flush_dcache_page(page);
pte = get_locked_pte(mm, address, ptl);
if (!pte)
diff -puN include/linux/memcontrol.h~memcontrol-acct include/linux/memcontrol.h
--- linux-2.6.20/include/linux/memcontrol.h~memcontrol-acct 2007-02-24 
19:39:29.0 +0530
+++ linux-2.6.20-balbir/include/linux/memcontrol.h  2007-02-24 
19:39:29.0 +0530
@@ -22,12 +22,59 @@
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
 
+/*
+ * MEMCONTROL_DONT_CHECK_LIMIT is useful for the following cases
+ * 1. During fork(), since pages are shared COW, we don't enforce limits
+ *on fork
+ * 2. During zeromap_pte_range(), again we don't enforce the limit for
+ *sharing ZERO_PAGE() in this case
+ * 3. When we actually reduce the RSS, add -1 to the rss
+ * It is generally useful when we do not want to enforce limits
+ */
+enum {
+   MEMCONTROL_CHECK_LIMIT = true,
+   MEMCONTROL_DONT_CHECK_LIMIT = false,
+};
+
 #ifdef CONFIG_CONTAINER_MEMCONTROL
+
 #ifndef kB
 #define kB 1024/* One Kilo Byte */
 #endif
 
+struct res_counter {
+   atomic_long_t usage;/* The current usage of the resource being */
+   /* counted */
+   atomic_long_t limit;/* The limit on the resource   */
+};
+
+extern int memcontrol_mm_init(struct mm_struct *mm);
+extern void memcontrol_mm_free(struct mm_struct *mm);
+extern void memcontrol_mm_assign_container(struct mm_struct *mm,
+   struct task_struct *p);
+extern int memcontrol_update_rss(struct mm_struct *mm, int count, bool check);
+
 #else /* CONFIG_CONTAINER_MEMCONTROL  */
 
+static inline int memcontrol_mm_init(struct mm_struct *mm)
+{
+   return 0;
+}
+
+static inline void memcontrol_mm_free(struct mm_struct *mm)
+{
+}
+
+static inline void memcontrol_mm_assign_container(struct mm_struct *mm,
+   struct task_struct *p)
+{
+}
+
+static inline int memcontrol_update_rss(struct mm_struct *mm, int count,
+   bool check)
+{
+   return 0;
+}
+
 #endif /* CONFIG_CONTAINER_MEMCONTROL */
 #endif /* _LINUX_MEMCONTROL_H */
diff -puN include/linux/sched.h~memcontrol-acct include/linux/sched.h
--- linux-2.6.20/include/linux/sched.h~memcontrol-acct  2007-02-24 
19:39:29.0 +0530
+++ linux-2.6.20-balbir/include/linux/sched.h   2007-02-24 19:39:29.0 
+0530
@@ -83,6 +83,7 @@ struct sched_param {
 #include linux/timer.h
 #include linux/hrtimer.h
 #include linux/task_io_accounting.h
+#include linux/memcontrol.h
 
 #include asm/processor.h
 
@@ -373,6 +374,16 @@ struct mm_struct {
/* aio bits */
rwlock_tioctx_list_lock;
struct kioctx   *ioctx_list;
+#ifdef CONFIG_CONTAINER_MEMCONTROL
+   /*
+* Each mm_struct's container, sums up in the container's counter
+* We can extend this such that, VMA's counters sum up