>From c13547c5758479116b6dcf10c58d0ef4f058351e Mon Sep 17 00:00:00 2001
From: Satoshi UCHIDA <[EMAIL PROTECTED]>
Date: Fri, 7 Nov 2008 19:21:19 +0900
Subject: [PATCH][cfq-cgroups] Introduce ioprio class for top layer.

  This patch introduces iprio class for cfq data control layer.
  By applying this patch, controller can also handle the RT/IDLE properties
  among groups.

    Signed-off-by: Satoshi UCHIDA <[EMAIL PROTECTED]>

---
 block/cfq-cgroup.c          |  344 +++++++++++++++++++++++++------------------
 include/linux/cfq-iosched.h |    1 +
 2 files changed, 203 insertions(+), 142 deletions(-)

diff --git a/block/cfq-cgroup.c b/block/cfq-cgroup.c
index bb8cb6f..993a3b6 100644
--- a/block/cfq-cgroup.c
+++ b/block/cfq-cgroup.c
@@ -20,11 +20,24 @@
 
 static const int cfq_cgroup_slice = HZ / 10;
 
+/*
+ * offset from end of service tree
+ */
+#define CFQ_CGROUP_IDLE_DELAY          (HZ / 5)
+
+#define cfq_data_class_idle(cfqd)                              \
+               ((cfqd)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define cfq_data_class_rt(cfqd)                                        \
+               ((cfqd)->ioprio_class == IOPRIO_CLASS_RT)
+
+
+
 static struct cfq_ops cfq_cgroup_op;
 
 struct cfq_cgroup {
        struct cgroup_subsys_state css;
        unsigned int ioprio;
+       unsigned short ioprio_class;
 
        struct rb_root sibling_tree;
        unsigned int siblings;
@@ -161,6 +174,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup 
*cfqc,
                cfqc = cgroup_to_cfq_cgroup(get_root_subsys(&cfq_subsys));
                cfq_cgroup_sibling_tree_add(cfqc, cfqd);
                cfqd->ioprio = cfqc->ioprio;
+               cfqd->ioprio_class = cfqc->ioprio_class;
        } else {
                struct cfq_data *__cfqd;
                __cfqd = __cfq_cgroup_init_queue(cfqd->cfqdd->queue,
@@ -168,7 +182,7 @@ static void *cfq_cgroup_init_cfq_data(struct cfq_cgroup 
*cfqc,
                if (!__cfqd)
                        return NULL;
                cfq_cgroup_sibling_tree_add(cfqc, __cfqd);
-               __cfqd->ioprio = cfqc->ioprio;
+               __cfqd->ioprio_class = cfqc->ioprio_class;
        }
 
        /* check and create cfq_data for children */
@@ -250,6 +264,7 @@ cfq_cgroup_create(struct cgroup_subsys *ss, struct cgroup 
*cont)
                return ERR_PTR(-ENOMEM);
 
        cfqc->ioprio = 3;
+       cfqc->ioprio = IOPRIO_CLASS_BE;
 
        cfqc->sibling_tree = RB_ROOT;
        cfqc->siblings = 0;
@@ -378,7 +393,15 @@ static void cfq_cgroup_service_tree_add(struct cfq_data 
*cfqd, int add_front)
        unsigned long rb_key;
        int left;
 
-       if (!add_front) {
+       if (cfq_data_class_idle(cfqd)) {
+               rb_key = CFQ_CGROUP_IDLE_DELAY;
+               parent = rb_last(&cfqdd->service_tree.rb);
+               if (parent && parent != &cfqd->rb_node) {
+                       __cfqd = rb_entry(parent, struct cfq_data, rb_node);
+                       rb_key += __cfqd->rb_key;
+               } else
+                       rb_key += jiffies;
+       } else if (!add_front) {
                rb_key = cfq_cgroup_slice_offset(cfqd) + jiffies;
                rb_key += cfqd->slice_resid;
                cfqd->slice_resid = 0;
@@ -400,7 +423,23 @@ static void cfq_cgroup_service_tree_add(struct cfq_data 
*cfqd, int add_front)
                parent = *p;
                __cfqd = rb_entry(parent, struct cfq_data, rb_node);
 
-               if (rb_key < __cfqd->rb_key)
+
+               /*
+                * sort RT cfq_data first, we always want to give
+                * preference to them. IDLE cfq_data goes to the back.
+                * after that, sort on the next service time.
+                */
+               if (cfq_data_class_rt(cfqd) > cfq_data_class_rt(__cfqd))
+                       n = &(*p)->rb_left;
+               else if (cfq_data_class_rt(cfqd) < cfq_data_class_rt(__cfqd))
+                       n = &(*p)->rb_right;
+               else if (cfq_data_class_idle(cfqd) <
+                                       cfq_data_class_idle(__cfqd))
+                       n = &(*p)->rb_left;
+               else if (cfq_data_class_idle(cfqd) >
+                                       cfq_data_class_idle(__cfqd))
+                       n = &(*p)->rb_right;
+               else if (rb_key < __cfqd->rb_key)
                        n = &(*p)->rb_left;
                else
                        n = &(*p)->rb_right;
@@ -542,6 +579,14 @@ int cfq_cgroup_dispatch_requests(struct request_queue *q, 
int force)
        if (cfqd)
                dispatched = cfq_queue_dispatch_requests(cfqd, force);
 
+       /*
+        * idle cfq_data always expire after 1 dispatch round.
+        */
+       if (cfqdd->busy_data > 1 && cfq_data_class_idle(cfqd)) {
+               cfqd->slice_end = jiffies + 1;
+               cfq_cgroup_slice_expired(cfqdd, 0);
+       }
+
        return dispatched;
 }
 
@@ -699,149 +744,164 @@ param_separate(const char *master, char *valbuf, char 
*pathbuf,  int size)
        *pc2 = '\0';
 }
 
-static ssize_t cfq_cgroup_read(struct cgroup *cont, struct cftype *cft,
-                              struct file *file, char __user *userbuf,
-                              size_t nbytes, loff_t *ppos)
-{
-       struct cfq_cgroup *cfqc;
-       char *page;
-       ssize_t ret;
-       struct rb_node *p;
-
-       page = (char *)__get_free_page(GFP_TEMPORARY);
-       if (!page)
-               return -ENOMEM;
-
-       cgroup_lock();
-       if (cgroup_is_removed(cont)) {
-               cgroup_unlock();
-               ret = -ENODEV;
-               goto out;
-       }
-
-       cfqc = cgroup_to_cfq_cgroup(cont);
-
-       cgroup_unlock();
-
-       /* print priority */
-       ret = snprintf(page, PAGE_SIZE, "default priority: %d\n", cfqc->ioprio);
-
-       p = rb_first(&cfqc->sibling_tree);
-       while (p) {
-               struct cfq_data *__cfqd;
-
-               __cfqd = rb_entry(p, struct cfq_data, group_node);
-
-               ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",
-                              __cfqd->cfqdd->queue->kobj.parent->name,
-                              __cfqd->ioprio);
-
-               p = rb_next(p);
-       }
 
-       ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);
-
-out:
-       free_page((unsigned long)page);
-       return ret;
+#define READ_FUNCTION(__FUNC, __VAR, __DEF_MSG)                                
\
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft,         \
+                       struct file *file, char __user *userbuf,        \
+                       size_t nbytes, loff_t *ppos)                    \
+{                                                                      \
+       struct cfq_cgroup *cfqc;                                        \
+       char *page;                                                     \
+       ssize_t ret;                                                    \
+       struct rb_node *p;                                              \
+                                                                       \
+       page = (char *)__get_free_page(GFP_TEMPORARY);                  \
+       if (!page)                                                      \
+               return -ENOMEM;                                         \
+                                                                       \
+       cgroup_lock();                                                  \
+       if (cgroup_is_removed(cont)) {                                  \
+               cgroup_unlock();                                        \
+               ret = -ENODEV;                                          \
+               goto out;                                               \
+       }                                                               \
+                                                                       \
+       cfqc = cgroup_to_cfq_cgroup(cont);                              \
+                                                                       \
+       cgroup_unlock();                                                \
+                                                                       \
+       /* print */                                                     \
+       ret = snprintf(page, PAGE_SIZE, "default " __DEF_MSG ": %d\n",  \
+                       cfqc->__VAR);                                   \
+                                                                       \
+       p = rb_first(&cfqc->sibling_tree);                              \
+       while (p) {                                                     \
+               struct cfq_data *__cfqd;                                \
+                                                                       \
+               __cfqd = rb_entry(p, struct cfq_data, group_node);      \
+                                                                       \
+               ret += snprintf(page + ret, PAGE_SIZE - ret, " %s %d\n",\
+                              __cfqd->cfqdd->queue->kobj.parent->name, \
+                              __cfqd->__VAR);                          \
+                                                                       \
+               p = rb_next(p);                                         \
+       }                                                               \
+                                                                       \
+       ret = simple_read_from_buffer(userbuf, nbytes, ppos, page, ret);\
+                                                                       \
+out:                                                                   \
+       free_page((unsigned long)page);                                 \
+       return ret;                                                     \
 }
-
-static ssize_t cfq_cgroup_write(struct cgroup *cont, struct cftype *cft,
-                               struct file *file, const char __user *userbuf,
-                               size_t nbytes, loff_t *ppos)
-{
-       struct cfq_cgroup *cfqc;
-       ssize_t ret;
-       long new_prio;
-       int err, sn;
-       char *buffer = NULL;
-       char *valbuf = NULL, *pathbuf = NULL;
-       struct rb_node *p;
-
-       cgroup_lock();
-       if (cgroup_is_removed(cont)) {
-               cgroup_unlock();
-               ret = -ENODEV;
-               goto out;
-       }
-
-       cfqc = cgroup_to_cfq_cgroup(cont);
-       cgroup_unlock();
-
-       /* set priority */
-       buffer = kmalloc(nbytes + 1, GFP_KERNEL);
-       if (buffer == NULL)
-               return -ENOMEM;
-
-       if (copy_from_user(buffer, userbuf, nbytes)) {
-               ret = -EFAULT;
-               goto free_buf;
-       }
-       buffer[nbytes] = 0;
-
-       valbuf = kmalloc(nbytes + 1, GFP_KERNEL);
-       if (!valbuf) {
-               ret = -ENOMEM;
-               goto free_buf;
-       }
-
-       pathbuf = kmalloc(nbytes + 1, GFP_KERNEL);
-       if (!pathbuf) {
-               ret = -ENOMEM;
-               goto free_val;
-       }
-
-       param_separate(buffer, valbuf, pathbuf, nbytes);
-
-       err = strict_strtoul(valbuf, 10, &new_prio);
-       if ((err) || ((new_prio < 0) || (new_prio > CFQ_CGROUP_MAX_IOPRIO))) {
-               ret = -EINVAL;
-               goto free_path;
-       }
-
-       sn = strlen(pathbuf);
-
-       p = rb_first(&cfqc->sibling_tree);
-       while (p) {
-               struct cfq_data *__cfqd;
-               const char *namep;
-
-               __cfqd = rb_entry(p, struct cfq_data, group_node);
-               namep = __cfqd->cfqdd->queue->kobj.parent->name;
-
-               if (sn == 0) {
-                       __cfqd->ioprio = new_prio;
-               } else if ((sn == strlen(namep)) &&
-                        (strncmp(pathbuf, namep, sn) == 0)) {
-                       __cfqd->ioprio = new_prio;
-                       break;
-               }
-
-               p = rb_next(p);
-       }
-
-       if ((sn == 0) ||
-           ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0)))
-               cfqc->ioprio = new_prio;
-
-       ret = nbytes;
-
-free_path:
-       kfree(pathbuf);
-free_val:
-       kfree(valbuf);
-free_buf:
-       kfree(buffer);
-out:
-       return ret;
+READ_FUNCTION(cfq_cgroup_ioprio_read, ioprio, "priority");
+READ_FUNCTION(cfq_cgroup_ioprio_class_read, ioprio_class, "priority class");
+#undef READ_FUNCTION
+
+#define WRITE_FUNCTION(__FUNC, __VAR, MIN, MAX)                                
\
+static ssize_t __FUNC(struct cgroup *cont, struct cftype *cft,         \
+                       struct file *file, const char __user *userbuf,  \
+                       size_t nbytes, loff_t *ppos)                    \
+{                                                                      \
+       struct cfq_cgroup *cfqc;                                        \
+       ssize_t ret;                                                    \
+       long new_val;                                                   \
+       int err, sn;                                                    \
+       char *buffer = NULL;                                            \
+       char *valbuf = NULL, *pathbuf = NULL;                           \
+       struct rb_node *p;                                              \
+                                                                       \
+       cgroup_lock();                                                  \
+       if (cgroup_is_removed(cont)) {                                  \
+               cgroup_unlock();                                        \
+               ret = -ENODEV;                                          \
+               goto out;                                               \
+       }                                                               \
+                                                                       \
+       cfqc = cgroup_to_cfq_cgroup(cont);                              \
+       cgroup_unlock();                                                \
+                                                                       \
+       /* set */                                                       \
+       buffer = kmalloc(nbytes + 1, GFP_KERNEL);                       \
+       if (buffer == NULL)                                             \
+               return -ENOMEM;                                         \
+                                                                       \
+       if (copy_from_user(buffer, userbuf, nbytes)) {                  \
+               ret = -EFAULT;                                          \
+               goto free_buf;                                          \
+       }                                                               \
+       buffer[nbytes] = 0;                                             \
+                                                                       \
+       valbuf = kmalloc(nbytes + 1, GFP_KERNEL);                       \
+       if (!valbuf) {                                                  \
+               ret = -ENOMEM;                                          \
+               goto free_buf;                                          \
+       }                                                               \
+                                                                       \
+       pathbuf = kmalloc(nbytes + 1, GFP_KERNEL);                      \
+       if (!pathbuf) {                                                 \
+               ret = -ENOMEM;                                          \
+               goto free_val;                                          \
+       }                                                               \
+                                                                       \
+       param_separate(buffer, valbuf, pathbuf, nbytes);                \
+                                                                       \
+       err = strict_strtoul(valbuf, 10, &new_val);                     \
+       if ((err) || ((new_val < (MIN)) || (new_val > (MAX)))) {        \
+               ret = -EINVAL;                                          \
+               goto free_path;                                         \
+       }                                                               \
+                                                                       \
+       sn = strlen(pathbuf);                                           \
+                                                                       \
+       p = rb_first(&cfqc->sibling_tree);                              \
+       while (p) {                                                     \
+               struct cfq_data *__cfqd;                                \
+               const char *namep;                                      \
+                                                                       \
+               __cfqd = rb_entry(p, struct cfq_data, group_node);      \
+               namep = __cfqd->cfqdd->queue->kobj.parent->name;        \
+                                                                       \
+               if (sn == 0) {                                          \
+                       __cfqd->__VAR = new_val;                        \
+               } else if ((sn == strlen(namep)) &&                     \
+                        (strncmp(pathbuf, namep, sn) == 0)) {          \
+                       __cfqd->__VAR = new_val;                        \
+                       break;                                          \
+               }                                                       \
+                                                                       \
+               p = rb_next(p);                                         \
+       }                                                               \
+                                                                       \
+       if ((sn == 0) ||                                                \
+           ((sn == 7) && (strncmp(pathbuf, "default", 7) == 0)))       \
+               cfqc->__VAR = new_val;                                  \
+                                                                       \
+       ret = nbytes;                                                   \
+                                                                       \
+free_path:                                                             \
+       kfree(pathbuf);                                                 \
+free_val:                                                              \
+       kfree(valbuf);                                                  \
+free_buf:                                                              \
+       kfree(buffer);                                                  \
+out:                                                                   \
+       return ret;                                                     \
 }
+WRITE_FUNCTION(cfq_cgroup_ioprio_write, ioprio, 0, CFQ_CGROUP_MAX_IOPRIO);
+WRITE_FUNCTION(cfq_cgroup_ioprio_class_write, ioprio_class, 0,
+                                                       IOPRIO_CLASS_IDLE);
+#undef WRITE_FUNCTION
+
+#define CFQ_CGROUP_CTYPE_ATTR(_name)                   \
+       {                                               \
+               .name = (__stringify(_name)),           \
+               .read = cfq_cgroup_##_name##_read,      \
+               .write = cfq_cgroup_##_name##_write,    \
+       }
 
 static struct cftype files[] = {
-       {
-               .name = "ioprio",
-               .read = cfq_cgroup_read,
-               .write = cfq_cgroup_write,
-       },
+       CFQ_CGROUP_CTYPE_ATTR(ioprio),
+       CFQ_CGROUP_CTYPE_ATTR(ioprio_class),
 };
 
 static int cfq_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --git a/include/linux/cfq-iosched.h b/include/linux/cfq-iosched.h
index 920bcb5..ca04ebd 100644
--- a/include/linux/cfq-iosched.h
+++ b/include/linux/cfq-iosched.h
@@ -102,6 +102,7 @@ struct cfq_data {
 
 #ifdef CONFIG_IOSCHED_CFQ_CGROUP
        unsigned int ioprio;
+       unsigned short ioprio_class;
 
        /* sibling_tree member for cfq_meta_data */
        struct rb_node sib_node;
-- 
1.5.6.5


_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/virtualization

Reply via email to