This is just an extract from the last CKRM/RG memory controller
sent by Chandra last week, necessary to connect a new controller
to the existing CKRM/RG code.

Minor changes in this part.


Signed-off-by: Patrick Le Dot <[EMAIL PROTECTED]>
---

 include/linux/mem_rc.h        |   71 +++++++
 include/linux/mem_rc_inline.h |   96 +++++++++
 init/Kconfig                  |    9
 kernel/res_group/Makefile     |    1
 kernel/res_group/memcore.c    |  415 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 592 insertions(+)

diff -Naurp a/include/linux/mem_rc.h b/include/linux/mem_rc.h
--- a/include/linux/mem_rc.h    1970-01-01 01:00:00.000000000 +0100
+++ b/include/linux/mem_rc.h    2006-10-03 09:34:21.000000000 +0200
@@ -0,0 +1,71 @@
+/* include/linux/mem_rc.h : memory control for Resource Groups
+ *
+ * Copyright (C) Jiantao Kong, IBM Corp. 2003
+ *           (C) Shailabh Nagar, IBM Corp. 2003
+ *           (C) Chandra Seetharaman, IBM Corp. 2004
+ *           (C) Patrick Le Dot <[EMAIL PROTECTED]@bull.net> 2006
+ *
+ *
+ * Memory control functions of the Resource Groups kernel API
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef _LINUX_MEM_RC_H
+#define _LINUX_MEM_RC_H
+
+#ifdef CONFIG_RES_GROUPS_MEM_RC
+
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/mmzone.h>
+#include <linux/res_group_rc.h>
+
+struct mem_res_group {
+       struct resource_group *rgroup;  /* the resource group i am part of... */
+       struct res_shares shares;
+       struct list_head res_list;      /* list of all res groups */
+       unsigned long flags;
+       unsigned int bit_id;            /* the group_id in bitmaps */
+       struct kref nr_users;           /* ref count */
+       atomic_t pg_inuse;              /* # of pages in use in the group */
+       int max_pg_used;                /* max of pages ever used */
+       int max_shrink_atlimit;         /* max of shrink_class ever called */
+       spinlock_t cnt_lock;
+       int pg_max_shares;              /* # of pages at the limit (max shares) 
*/
+       int pg_min_shares;              /* # of pages under garantee (min 
shares */
+       struct list_head shrink_list;   /* list of classes that are near
+                                        * limit and need to be shrunk
+                                        */
+       atomic_t shrink_count;
+       unsigned long last_shrink;
+};
+
+extern struct res_controller mem_ctlr;
+extern struct mem_res_group *mem_root_res_group;
+extern struct list_head mem_res_group_list;
+extern spinlock_t mem_res_group_lock;
+extern int nr_mem_res_groups;
+extern unsigned int tot_lru_pages;
+extern unsigned int rgroup_guarantee;
+extern unsigned int rgroup_limit;
+extern int num_shrinks;
+extern int shrink_to;
+extern int shrink_at;
+extern int shrink_interval;
+
+extern void rg_mem_release(struct kref *);
+extern void rg_mem_add_page(struct page *page, struct mem_res_group *res);
+extern void rg_mem_remove_page(struct page *page, struct mem_res_group *res);
+extern void rg_mem_migrate_mm(struct mm_struct* mm, struct mem_res_group *old,
+                               struct mem_res_group *new);
+
+#endif /* CONFIG_RES_GROUPS_MEM_RC */
+
+#endif /* _LINUX_MEM_RC_H */
diff -Naurp a/include/linux/mem_rc_inline.h b/include/linux/mem_rc_inline.h
--- a/include/linux/mem_rc_inline.h     1970-01-01 01:00:00.000000000 +0100
+++ b/include/linux/mem_rc_inline.h     2006-10-03 09:34:21.000000000 +0200
@@ -0,0 +1,96 @@
+/* include/linux/mem_rc_inline.h : memory control for Resource Groups
+ *
+ * Copyright (C) Jiantao Kong, IBM Corp. 2003
+ *           (C) Shailabh Nagar, IBM Corp. 2003
+ *           (C) Chandra Seetharaman, IBM Corp. 2004
+ *           (C) Patrick Le Dot <[EMAIL PROTECTED]@bull.net> 2006
+ *
+ *
+ * Memory control functions of the Resource Groups kernel API
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef _LINUX_MEM_RC_INLINE_H_
+#define _LINUX_MEM_RC_INLINE_H_
+
+#include <linux/rmap.h>
+#include <linux/mmzone.h>
+#include <linux/mem_rc.h>
+
+#ifdef CONFIG_RES_GROUPS_MEM_RC
+
+static inline struct mem_res_group *get_shares_mem_rgroup(
+                                               struct res_shares *shares)
+{
+       if (shares)
+               return container_of(shares, struct mem_res_group, shares);
+       return NULL;
+}
+
+static inline struct mem_res_group *task_mem_rgroup(struct task_struct *tsk)
+{
+       return get_shares_mem_rgroup(get_controller_shares(tsk->res_group,
+                                               &mem_ctlr));
+}
+
+static inline struct mem_res_group *get_mem_rgroup(struct resource_group *res)
+{
+       return get_shares_mem_rgroup(get_controller_shares(res,
+                                               &mem_ctlr));
+}
+
+static inline void res_group_inc_active_list(struct page *page)
+{
+       struct mem_res_group *res = task_mem_rgroup(current)
+                                               ?: mem_root_res_group;
+       if (res == NULL)
+               return;
+       // rg_mem_add_page(page, res);
+}
+
+static inline void res_group_dec_active_list(struct page *page)
+{
+       // rg_mem_remove_page(page, NULL);
+}
+
+static inline void res_group_inc_inactive_list(struct page *page)
+{
+       struct mem_res_group *res = task_mem_rgroup(current)
+                                       ?: mem_root_res_group;
+
+       if (res == NULL)
+               return;
+       // rg_mem_add_page(page, res);
+}
+
+static inline void res_group_dec_inactive_list(struct page *page)
+{
+       // rg_mem_remove_page(page, NULL);
+}
+
+static inline void res_group_page_init(struct page *page)
+{
+       
+}
+
+#else
+
+static inline void *task_mem_rgroup(struct task_struct *tsk)
+{
+       return NULL;
+}
+
+static inline void res_group_inc_active_list(struct page *p)   {}
+static inline void res_group_dec_active_list(struct page *p)   {}
+static inline void res_group_inc_inactive_list(struct page *p) {}
+static inline void res_group_dec_inactive_list(struct page *p) {}
+
+#endif
+#endif /* _LINUX_MEM_RC_INLINE_H_ */
diff -Naurp a/init/Kconfig b/init/Kconfig
--- a/init/Kconfig      2006-10-03 09:35:37.000000000 +0200
+++ b/init/Kconfig      2006-10-03 09:34:21.000000000 +0200
@@ -307,6 +307,15 @@ config RES_GROUPS_NUMTASKS
 
          Say N if unsure, Y to use the feature.
 
+config RES_GROUPS_MEM_RC
+       bool "Memory Resource Controller"
+       depends on RES_GROUPS
+       default y
+       help
+         Provide the basic support for collecting physical memory usage
+         information among resource groups. Say Y if you want to know the
+         memory usage of each resource group.
+
 endmenu
 config SYSCTL
        bool "Sysctl support" if EMBEDDED
diff -Naurp a/kernel/res_group/Makefile b/kernel/res_group/Makefile
--- a/kernel/res_group/Makefile 2006-10-03 09:35:37.000000000 +0200
+++ b/kernel/res_group/Makefile 2006-10-03 09:34:21.000000000 +0200
@@ -1,3 +1,4 @@
 obj-y = res_group.o shares.o task.o
 obj-$(CONFIG_RES_GROUPS_NUMTASKS) += numtasks.o
+obj-$(CONFIG_RES_GROUPS_MEM_RC) += memcore.o
 obj-$(CONFIG_RGCS) += rgcs.o
diff -Naurp a/kernel/res_group/memcore.c b/kernel/res_group/memcore.c
--- a/kernel/res_group/memcore.c        1970-01-01 01:00:00.000000000 +0100
+++ b/kernel/res_group/memcore.c        2006-10-03 09:34:21.000000000 +0200
@@ -0,0 +1,415 @@
+/* memcore.c - Memory Resource Manager for Resource Groups
+ *
+ * Copyright (C) Jiantao Kong, IBM Corp. 2003
+ *           (C) Chandra Seetharaman, IBM Corp. 2004
+ *           (C) Valerie Clement <[EMAIL PROTECTED]> 2004
+ *           (C) Patrick Le Dot <[EMAIL PROTECTED]@bull.net> 2006
+ *
+ * Provides a Memory Resource controller for Resource Groups
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/pagevec.h>
+#include <linux/parser.h>
+#include <linux/mem_rc_inline.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/errno.h>
+
+static const char res_ctlr_name[] = "mem";
+
+unsigned int tot_lru_pages;    /* # of pages in the system */
+unsigned int rgroup_guarantee = 0;     /* group_bit=1 when usage < guarantee */
+unsigned int rgroup_limit = 0;         /* group_bit=1 when usage = limit */
+unsigned int rg_bitmap_shift_index = 0;
+int nr_mem_res_groups = 0;
+struct mem_res_group *mem_root_res_group = NULL;
+
+LIST_HEAD(mem_res_group_list);
+spinlock_t mem_res_group_lock; /* protects list above */
+
+#define DEF_SHRINK_AT 90
+#define DEF_SHRINK_TO 80
+#define DEF_SHRINK_COUNT 10
+#define DEF_SHRINK_INTERVAL 10
+
+int shrink_at __read_mostly = DEF_SHRINK_AT;
+int shrink_to __read_mostly = DEF_SHRINK_TO;
+int num_shrinks __read_mostly = DEF_SHRINK_COUNT;
+int shrink_interval __read_mostly = DEF_SHRINK_INTERVAL;
+
+void rg_mem_release(struct kref *kref)
+{
+       struct mem_res_group *res = container_of(kref,
+                               struct mem_res_group, nr_users);
+       kfree(res);
+}
+
+static void set_tot_pages(void)
+{
+       struct zone *zone;
+       int i = 0;
+
+       for_each_zone(zone) {
+               if (!populated_zone(zone))
+                       continue;
+               i += zone->nr_active;
+               i += zone->nr_inactive;
+               i += zone->free_pages;
+       }
+       tot_lru_pages = i;
+}
+
+static void mem_res_init_one(struct mem_res_group *mem_res)
+{
+
+       mem_res->shares.min_shares = SHARE_UNSUPPORTED;
+       mem_res->shares.max_shares = SHARE_UNSUPPORTED;
+       mem_res->shares.child_shares_divisor = SHARE_DEFAULT_DIVISOR;
+       mem_res->shares.unused_min_shares = SHARE_DEFAULT_DIVISOR;
+
+       mem_res->pg_max_shares = 0;
+       mem_res->pg_min_shares = 0;
+       mem_res->last_shrink = jiffies;
+
+       mem_res->cnt_lock = SPIN_LOCK_UNLOCKED;
+       INIT_LIST_HEAD(&mem_res->res_list);
+       INIT_LIST_HEAD(&mem_res->shrink_list);
+
+       mem_res->bit_id = (1 << rg_bitmap_shift_index);
+
+       kref_init(&mem_res->nr_users);
+}
+
+static struct res_shares *mem_alloc_shares_struct(struct resource_group 
*rgroup)
+{
+       struct mem_res_group *res;
+
+       res = kzalloc(sizeof(struct mem_res_group), GFP_ATOMIC);
+       if (!res)
+               return NULL;
+
+       res->rgroup = rgroup;
+       mem_res_init_one(res);
+       rg_bitmap_shift_index++;
+       if (is_res_group_root(res->rgroup)) {
+               res->pg_max_shares = tot_lru_pages;
+               res->pg_min_shares = tot_lru_pages;
+               mem_root_res_group = res;
+       }
+       spin_lock_irq(&mem_res_group_lock);
+       list_add(&res->res_list, &mem_res_group_list);
+       spin_unlock_irq(&mem_res_group_lock);
+       nr_mem_res_groups++;
+
+       return &res->shares;
+}
+
+static int recalc_shares(int self_shares, int parent_shares, int 
parent_divisor)
+{
+       u64 numerator;
+
+       if ((self_shares == SHARE_DONT_CARE) ||
+                       (parent_shares == SHARE_DONT_CARE))
+               return SHARE_DONT_CARE;
+       if (parent_divisor == 0)
+               return 0;
+       numerator = (u64) self_shares * parent_shares;
+       do_div(numerator, parent_divisor);
+       return numerator;
+}
+
+static void recalc_self(struct mem_res_group *res,
+                               struct mem_res_group *parres)
+{
+       struct res_shares *par = &parres->shares;
+       struct res_shares *self = &res->shares;
+
+       if (self->max_shares != SHARE_UNSUPPORTED)
+               res->pg_max_shares = recalc_shares(self->max_shares,
+                                               parres->pg_max_shares,
+                                               par->child_shares_divisor);
+
+       if (self->min_shares != SHARE_UNSUPPORTED)
+               res->pg_min_shares = recalc_shares(self->min_shares,
+                                               parres->pg_min_shares,
+                                               par->child_shares_divisor);
+}
+/*
+ * Recalculate the min_shares and max_shares in # of pages... and propagate the
+ * same to children.
+ * Caller is responsible for protecting integrity of self_shares and
+ * parent_shares
+ */
+static void recalc_and_propagate(struct mem_res_group * res,
+                                       struct mem_res_group * parres)
+{
+       struct resource_group *child = NULL;
+       struct mem_res_group *childres;
+
+       if (parres)
+               recalc_self(res, parres);
+
+       /* propagate to children */
+       spin_lock(&res->rgroup->group_lock);
+       for_each_child(child, res->rgroup) {
+               childres = get_mem_rgroup(child);
+               BUG_ON(!childres);
+               spin_lock(&childres->cnt_lock);
+               recalc_and_propagate(childres, res);
+               spin_unlock(&childres->cnt_lock);
+       }
+       spin_unlock(&res->rgroup->group_lock);
+       return;
+}
+
+static void res_group_migrate_all_pages(struct mem_res_group* from,
+                                               struct mem_res_group* dest)
+{
+       // expensive walk : each page of the group should be updated...
+       // for each task of the group_from
+       //      mem_move_task(task, from, dest);
+}
+
+static void mem_free_shares_struct(struct res_shares *my_res)
+{
+       struct mem_res_group *res, *parres;
+
+       res = get_shares_mem_rgroup(my_res);
+       if (!res)
+               return;
+
+       if (!is_res_group_root(res->rgroup)) {
+               parres = get_mem_rgroup(res->rgroup->parent);
+               res_group_migrate_all_pages(res, parres);
+       }
+
+       /*
+        * Making it all zero as freeing of data structure could
+        * happen later.
+        */
+       res->shares.min_shares = 0;
+       res->shares.max_shares = 0;
+       res->pg_max_shares = 0;
+       res->pg_min_shares = 0;
+
+       spin_lock_irq(&mem_res_group_lock);
+       list_del_init(&res->res_list);
+       spin_unlock_irq(&mem_res_group_lock);
+
+       res->rgroup = NULL;
+       kref_put(&res->nr_users, rg_mem_release);
+       nr_mem_res_groups--;
+       return;
+}
+
+static void mem_shares_changed(struct res_shares *my_res)
+{
+       struct mem_res_group *res, *parres;
+       struct res_shares *par;
+
+       res = get_shares_mem_rgroup(my_res);
+       if (!res)
+               return;
+
+       if (!is_res_group_root(res->rgroup)) {
+               parres = get_mem_rgroup(res->rgroup->parent);
+               spin_lock(&parres->cnt_lock);
+               par = &parres->shares;
+       } else {
+               parres = NULL;
+               par = NULL;
+       }
+       spin_lock(&res->cnt_lock);
+
+       recalc_and_propagate(res, parres);
+       spin_unlock(&res->cnt_lock);
+       if (!is_res_group_root(res->rgroup))
+               spin_unlock(&parres->cnt_lock);
+}
+
+static ssize_t mem_show_stats(struct res_shares  *my_res,
+                                       char *buf, size_t buf_size)
+{
+       struct mem_res_group *res;
+       struct zone *zone;
+       int active = 0, inactive = 0, fr = 0;
+       ssize_t i, j = 0;
+       u64 temp;
+
+       res = get_shares_mem_rgroup(my_res);
+       if (!res)
+               return -EINVAL;
+
+       if (res == mem_root_res_group) {
+               for_each_zone(zone) {
+                       if (!populated_zone(zone))
+                               continue;
+                       active += zone->nr_active;
+                       inactive += zone->nr_inactive;
+                       fr += zone->free_pages;
+               }
+               i = snprintf(buf, buf_size,"%s: System: tot_pages=%d,"
+                               " active=%d,  inactive=%d, free=%d\n",
+                               res_ctlr_name, tot_lru_pages, active,
+                               inactive, fr);
+               buf += i; j += i; buf_size -= i;
+       }
+       i = snprintf(buf, buf_size, "%s: Current number of pages in use %d\n",
+                       res_ctlr_name, atomic_read(&res->pg_inuse));
+       buf += i; j += i; buf_size -= i;
+       temp = (u64)(res->max_pg_used * res->shares.child_shares_divisor);
+       do_div(temp, tot_lru_pages);
+       i = snprintf(buf, buf_size, "%s: Maximum of pages ever used %d 
(%d%%)\n",
+                       res_ctlr_name, res->max_pg_used, (int)temp);
+       buf += i; j += i; buf_size -= i;
+       i = snprintf(buf, buf_size, "%s: Maximum of pages with guarantee %d\n",
+                       res_ctlr_name, res->pg_min_shares);
+       buf += i; j += i; buf_size -= i;
+       i = snprintf(buf, buf_size, "%s: Maximum of pages at limit %d\n",
+                       res_ctlr_name, res->pg_max_shares);
+       buf += i; j += i; buf_size -= i;
+       i = snprintf(buf, buf_size, "%s: Maximum of shrink ever called %d\n",
+                       res_ctlr_name, res->max_shrink_atlimit);
+       j += i;
+
+       return j;
+}
+
+static int mem_reset_stats(struct res_shares *my_res, const char *cfgstr)
+{
+       struct mem_res_group *res;
+
+       res = get_shares_mem_rgroup(my_res);
+       if (!res)
+               return -EINVAL;
+       res->max_shrink_atlimit = 0;
+       res->max_pg_used = 0;
+       return 0;
+}
+
+static void mem_move_task(struct task_struct *tsk,
+               struct res_shares *old, struct res_shares *new)
+{
+       struct mm_struct *mm;
+       struct task_struct *task = tsk;
+       struct mem_res_group *oldres, *newres;
+
+       oldres = get_shares_mem_rgroup(old);
+       if (!oldres)
+               oldres = get_mem_rgroup(task->real_parent->res_group);
+       BUG_ON(!oldres);
+       newres = get_shares_mem_rgroup(new);
+       if (!task->mm || (newres == oldres))
+               return;
+
+       mm = task->active_mm;
+       // rg_mem_migrate_mm(mm, oldres, newres);
+       return;
+}
+
+static int set_mem_config_val(int *var, int old_value, const char *val,
+                               struct kernel_param *kp)
+{
+       int rc = param_set_int(val, kp);
+
+       if (rc < 0)
+               return rc;
+       if (*var < 1) {
+               *var = old_value;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int set_shrink_at(const char *val, struct kernel_param *kp)
+{
+       int prev = shrink_at;
+       int rc = set_mem_config_val(&shrink_at, prev, val, kp);
+       if (rc < 0)
+               return rc;
+       return 0;
+}
+module_param_set_call(shrink_at, int, set_shrink_at, S_IRUGO | S_IWUSR);
+
+static int set_shrink_to(const char *val, struct kernel_param *kp)
+{
+       int prev = shrink_to;
+       int rc = set_mem_config_val(&shrink_to, prev, val, kp);
+       if (rc < 0)
+               return rc;
+       return 0;
+}
+module_param_set_call(shrink_to, int, set_shrink_to, S_IRUGO | S_IWUSR);
+
+static int set_num_shrinks(const char *val, struct kernel_param *kp)
+{
+       int prev = num_shrinks;
+       int rc = set_mem_config_val(&num_shrinks, prev, val, kp);
+       if (rc < 0)
+               return rc;
+       return 0;
+}
+module_param_set_call(num_shrinks, int, set_num_shrinks, S_IRUGO | S_IWUSR);
+
+static int set_shrink_interval(const char *val, struct kernel_param *kp)
+{
+       int prev = shrink_interval;
+       int rc = set_mem_config_val(&shrink_interval, prev, val, kp);
+       if (rc < 0)
+               return rc;
+       return 0;
+}
+module_param_set_call(shrink_interval, int, set_shrink_interval,
+                                               S_IRUGO | S_IWUSR);
+
+
+struct res_controller mem_ctlr = {
+       .name              = res_ctlr_name,
+       .depth_supported   = 1,
+       .ctlr_id           = NO_RES_ID,
+       .alloc_shares_struct = mem_alloc_shares_struct,
+       .free_shares_struct  = mem_free_shares_struct,
+       .shares_changed    = mem_shares_changed,
+       .show_stats        = mem_show_stats,
+       .reset_stats       = mem_reset_stats,
+       .move_task         = mem_move_task,
+};
+
+int __init init_mem_res_group(void)
+{
+       if (mem_ctlr.ctlr_id != NO_RES_ID)
+               return -EBUSY; /* already registered */
+
+       set_tot_pages();
+
+       spin_lock_init(&mem_res_group_lock);
+       return register_controller(&mem_ctlr);
+}
+
+void __exit exit_mem_res_group(void)
+{
+       unregister_controller(&mem_ctlr);
+}
+
+module_init(init_mem_res_group)
+module_exit(exit_mem_res_group)
+MODULE_LICENSE("GPL");


+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+    Patrick Le Dot
 mailto: [EMAIL PROTECTED]@bull.net         Centre UNIX de BULL SAS
 Phone : +33 4 76 29 73 20               1, Rue de Provence     BP 208
 Fax   : +33 4 76 29 76 00               38130 ECHIROLLES Cedex FRANCE
 Bull, Architect of an Open World TM
 www.bull.com

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
ckrm-tech mailing list
https://lists.sourceforge.net/lists/listinfo/ckrm-tech

Reply via email to