o This patch introduces some of the cgroup related code for io controller.

Signed-off-by: Fabio Checconi <fa...@gandalf.sssup.it>
Signed-off-by: Paolo Valente <paolo.vale...@unimore.it>
Signed-off-by: Nauman Rafique <nau...@google.com>
Signed-off-by: Gui Jianfeng <guijianf...@cn.fujitsu.com>
Signed-off-by: Vivek Goyal <vgo...@redhat.com>
Acked-by: Rik van Riel <r...@redhat.com>
---
 block/blk-ioc.c               |    3 +
 block/elevator-fq.c           |  169 ++++++++++++++++++++++++++++++++++++++++-
 block/elevator-fq.h           |   14 ++++
 include/linux/cgroup_subsys.h |    6 ++
 include/linux/iocontext.h     |    5 +
 5 files changed, 196 insertions(+), 1 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index d4ed600..0d56336 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -95,6 +95,9 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
                spin_lock_init(&ret->lock);
                ret->ioprio_changed = 0;
                ret->ioprio = 0;
+#ifdef CONFIG_GROUP_IOSCHED
+               ret->cgroup_changed = 0;
+#endif
                ret->last_waited = jiffies; /* doesn't matter... */
                ret->nr_batch_requests = 0; /* because this is 0 */
                ret->aic = NULL;
diff --git a/block/elevator-fq.c b/block/elevator-fq.c
index 0e3d58c..0c060a6 100644
--- a/block/elevator-fq.c
+++ b/block/elevator-fq.c
@@ -265,7 +265,7 @@ static void entity_served(struct io_entity *entity, 
unsigned long served,
        unsigned long charge = queue_charge;
 
        for_each_entity(entity) {
-               entity->vdisktime += elv_delta_fair(queue_charge, entity);
+               entity->vdisktime += elv_delta_fair(charge, entity);
                update_min_vdisktime(entity->st);
                /* Group charge can be different from queue charge */
                charge = group_charge;
@@ -920,6 +920,173 @@ EXPORT_SYMBOL(elv_io_group_set_async_queue);
 
 #ifdef CONFIG_GROUP_IOSCHED
 
+struct io_cgroup io_root_cgroup = {
+       .weight = IO_WEIGHT_DEFAULT,
+       .ioprio_class = IOPRIO_CLASS_BE,
+};
+
+static struct io_cgroup *cgroup_to_io_cgroup(struct cgroup *cgroup)
+{
+       return container_of(cgroup_subsys_state(cgroup, io_subsys_id),
+                           struct io_cgroup, css);
+}
+
+#define SHOW_FUNCTION(__VAR)                                           \
+static u64 io_cgroup_##__VAR##_read(struct cgroup *cgroup,             \
+                                      struct cftype *cftype)           \
+{                                                                      \
+       struct io_cgroup *iocg;                                 \
+       u64 ret;                                                        \
+                                                                       \
+       if (!cgroup_lock_live_group(cgroup))                            \
+               return -ENODEV;                                         \
+                                                                       \
+       iocg = cgroup_to_io_cgroup(cgroup);                             \
+       spin_lock_irq(&iocg->lock);                                     \
+       ret = iocg->__VAR;                                              \
+       spin_unlock_irq(&iocg->lock);                                   \
+                                                                       \
+       cgroup_unlock();                                                \
+                                                                       \
+       return ret;                                                     \
+}
+
+SHOW_FUNCTION(weight);
+SHOW_FUNCTION(ioprio_class);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__VAR, __MIN, __MAX)                            \
+static int io_cgroup_##__VAR##_write(struct cgroup *cgroup,            \
+                                       struct cftype *cftype,          \
+                                       u64 val)                        \
+{                                                                      \
+       struct io_cgroup *iocg;                                 \
+       struct io_group *iog;                                           \
+       struct hlist_node *n;                                           \
+                                                                       \
+       if (val < (__MIN) || val > (__MAX))                             \
+               return -EINVAL;                                         \
+                                                                       \
+       if (!cgroup_lock_live_group(cgroup))                            \
+               return -ENODEV;                                         \
+                                                                       \
+       iocg = cgroup_to_io_cgroup(cgroup);                             \
+                                                                       \
+       spin_lock_irq(&iocg->lock);                                     \
+       iocg->__VAR = (unsigned long)val;                               \
+       hlist_for_each_entry(iog, n, &iocg->group_data, group_node) {   \
+               iog->entity.__VAR = (unsigned long)val;         \
+               smp_wmb();                                              \
+               iog->entity.ioprio_changed = 1;                         \
+       }                                                               \
+       spin_unlock_irq(&iocg->lock);                                   \
+                                                                       \
+       cgroup_unlock();                                                \
+                                                                       \
+       return 0;                                                       \
+}
+
+STORE_FUNCTION(weight, IO_WEIGHT_MIN, IO_WEIGHT_MAX);
+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
+#undef STORE_FUNCTION
+
+struct cftype io_files[] = {
+       {
+               .name = "weight",
+               .read_u64 = io_cgroup_weight_read,
+               .write_u64 = io_cgroup_weight_write,
+       },
+       {
+               .name = "ioprio_class",
+               .read_u64 = io_cgroup_ioprio_class_read,
+               .write_u64 = io_cgroup_ioprio_class_write,
+       },
+};
+
+static int iocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+{
+       return cgroup_add_files(cgroup, subsys, io_files, ARRAY_SIZE(io_files));
+}
+
+static struct cgroup_subsys_state *iocg_create(struct cgroup_subsys *subsys,
+                                               struct cgroup *cgroup)
+{
+       struct io_cgroup *iocg;
+
+       if (cgroup->parent != NULL) {
+               iocg = kzalloc(sizeof(*iocg), GFP_KERNEL);
+               if (iocg == NULL)
+                       return ERR_PTR(-ENOMEM);
+       } else
+               iocg = &io_root_cgroup;
+
+       spin_lock_init(&iocg->lock);
+       INIT_HLIST_HEAD(&iocg->group_data);
+       iocg->weight = IO_WEIGHT_DEFAULT;
+       iocg->ioprio_class = IOPRIO_CLASS_BE;
+
+       return &iocg->css;
+}
+
+/*
+ * We cannot support shared io contexts, as we have no mean to support
+ * two tasks with the same ioc in two different groups without major rework
+ * of the main cic data structures.  By now we allow a task to change
+ * its cgroup only if it's the only owner of its ioc; the drawback of this
+ * behavior is that a group containing a task that forked using CLONE_IO
+ * will not be destroyed until the tasks sharing the ioc die.
+ */
+static int iocg_can_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
+                           struct task_struct *tsk)
+{
+       struct io_context *ioc;
+       int ret = 0;
+
+       /* task_lock() is needed to avoid races with exit_io_context() */
+       task_lock(tsk);
+       ioc = tsk->io_context;
+       if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
+               /*
+                * ioc == NULL means that the task is either too young or
+                * exiting: if it has still no ioc the ioc can't be shared,
+                * if the task is exiting the attach will fail anyway, no
+                * matter what we return here.
+                */
+               ret = -EINVAL;
+       task_unlock(tsk);
+
+       return ret;
+}
+
+static void iocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
+                        struct cgroup *prev, struct task_struct *tsk)
+{
+       struct io_context *ioc;
+
+       task_lock(tsk);
+       ioc = tsk->io_context;
+       if (ioc != NULL)
+               ioc->cgroup_changed = 1;
+       task_unlock(tsk);
+}
+
+static void iocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+{
+
+       /* Implemented in later patch */
+}
+
+struct cgroup_subsys io_subsys = {
+       .name = "io",
+       .create = iocg_create,
+       .can_attach = iocg_can_attach,
+       .attach = iocg_attach,
+       .destroy = iocg_destroy,
+       .populate = iocg_populate,
+       .subsys_id = io_subsys_id,
+       .use_id = 1,
+};
+
 static void io_free_root_group(struct elevator_queue *e)
 {
        struct io_group *iog = e->efqd->root_group;
diff --git a/block/elevator-fq.h b/block/elevator-fq.h
index 068f240..f343841 100644
--- a/block/elevator-fq.h
+++ b/block/elevator-fq.h
@@ -13,6 +13,7 @@
 
 #ifdef CONFIG_BLOCK
 #include <linux/blkdev.h>
+#include <linux/cgroup.h>
 
 #ifndef _ELV_SCHED_H
 #define _ELV_SCHED_H
@@ -98,6 +99,8 @@ struct io_group {
        struct io_entity entity;
        atomic_t ref;
        struct io_sched_data sched_data;
+       struct hlist_node group_node;
+       unsigned short iocg_id;
        /*
         * async queue for each priority case for RT and BE class.
         * Used only for cfq.
@@ -108,6 +111,17 @@ struct io_group {
        void *key;
 };
 
+struct io_cgroup {
+       struct cgroup_subsys_state css;
+
+       unsigned int weight;
+       unsigned short ioprio_class;
+
+       spinlock_t lock;
+       struct hlist_head group_data;
+};
+
+
 #else /* CONFIG_GROUP_IOSCHED */
 
 struct io_group {
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9c8d31b..baf544f 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -60,3 +60,9 @@ SUBSYS(net_cls)
 #endif
 
 /* */
+
+#ifdef CONFIG_GROUP_IOSCHED
+SUBSYS(io)
+#endif
+
+/* */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 4da4a75..b343594 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -73,6 +73,11 @@ struct io_context {
        unsigned short ioprio;
        unsigned short ioprio_changed;
 
+#ifdef CONFIG_GROUP_IOSCHED
+       /* If task changes the cgroup, elevator processes it asynchronously */
+       unsigned short cgroup_changed;
+#endif
+
        /*
         * For request batching
         */
-- 
1.6.0.6

_______________________________________________
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to