Introduce attributes and functions in res_counter to implement throttling-based
cgroup subsystems.

The following attributes have been added to struct res_counter:
 * @policy:     the limiting policy / algorithm
 * @capacity:   the maximum capacity of the resource
 * @timestamp:  timestamp of the last accounted resource request

Currently the available policies are: token-bucket and leaky-bucket and the
attribute @capacity is only used by token-bucket policy (to represent the
bucket size).

The following function has been implemented to return the amount of time a
cgroup should sleep to remain within the defined resource limits.

  unsigned long long
  res_counter_ratelimit_sleep(struct res_counter *res, ssize_t val);

[ Note: only the interfaces needed by the cgroup IO controller are implemented
right now ]

Signed-off-by: Andrea Righi <righi.and...@gmail.com>
---
 include/linux/res_counter.h |   69 +++++++++++++++++++++++++++++++----------
 kernel/res_counter.c        |   72 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+), 17 deletions(-)

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 4c5bcf6..9bed6af 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -14,30 +14,36 @@
  */
 
 #include <linux/cgroup.h>
+#include <linux/jiffies.h>
 
-/*
- * The core object. the cgroup that wishes to account for some
- * resource may include this counter into its structures and use
- * the helpers described beyond
- */
+/* The various policies that can be used for ratelimiting resources */
+#define        RATELIMIT_LEAKY_BUCKET  0
+#define        RATELIMIT_TOKEN_BUCKET  1
 
+/**
+ * struct res_counter - the core object to account cgroup resources
+ *
+ * @usage:     the current resource consumption level
+ * @max_usage: the maximal value of the usage from the counter creation
+ * @limit:     the limit that usage cannot be exceeded
+ * @failcnt:   the number of unsuccessful attempts to consume the resource
+ * @policy:    the limiting policy / algorithm
+ * @capacity:  the maximum capacity of the resource
+ * @timestamp: timestamp of the last accounted resource request
+ * @lock:      the lock to protect all of the above.
+ *             The routines below consider this to be IRQ-safe
+ *
+ * The cgroup that wishes to account for some resource may include this counter
+ * into its structures and use the helpers described beyond.
+ */
 struct res_counter {
-       /*
-        * the current resource consumption level
-        */
        unsigned long long usage;
-       /*
-        * the maximal value of the usage from the counter creation
-        */
        unsigned long long max_usage;
-       /*
-        * the limit that usage cannot exceed
-        */
        unsigned long long limit;
-       /*
-        * the number of unsuccessful attempts to consume the resource
-        */
        unsigned long long failcnt;
+       unsigned long long policy;
+       unsigned long long capacity;
+       unsigned long long timestamp;
        /*
         * the lock to protect all of the above.
         * the routines below consider this to be IRQ-safe
@@ -84,6 +90,9 @@ enum {
        RES_USAGE,
        RES_MAX_USAGE,
        RES_LIMIT,
+       RES_POLICY,
+       RES_TIMESTAMP,
+       RES_CAPACITY,
        RES_FAILCNT,
 };
 
@@ -130,6 +139,15 @@ static inline bool res_counter_limit_check_locked(struct 
res_counter *cnt)
        return false;
 }
 
+static inline unsigned long long
+res_counter_ratelimit_delta_t(struct res_counter *res)
+{
+       return (long long)get_jiffies_64() - (long long)res->timestamp;
+}
+
+unsigned long long
+res_counter_ratelimit_sleep(struct res_counter *res, ssize_t val);
+
 /*
  * Helper function to detect if the cgroup is within it's limit or
  * not. It's currently called from cgroup_rss_prepare()
@@ -163,6 +181,23 @@ static inline void res_counter_reset_failcnt(struct 
res_counter *cnt)
        spin_unlock_irqrestore(&cnt->lock, flags);
 }
 
+static inline int
+res_counter_ratelimit_set_limit(struct res_counter *cnt,
+                       unsigned long long policy,
+                       unsigned long long limit, unsigned long long max)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       cnt->limit = limit;
+       cnt->capacity = max;
+       cnt->policy = policy;
+       cnt->timestamp = get_jiffies_64();
+       cnt->usage = 0;
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return 0;
+}
+
 static inline int res_counter_set_limit(struct res_counter *cnt,
                unsigned long long limit)
 {
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bf8e753..b62319c 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 #include <linux/parser.h>
+#include <linux/jiffies.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/res_counter.h>
@@ -20,6 +21,8 @@ void res_counter_init(struct res_counter *counter, struct 
res_counter *parent)
        spin_lock_init(&counter->lock);
        counter->limit = (unsigned long long)LLONG_MAX;
        counter->parent = parent;
+       counter->capacity = (unsigned long long)LLONG_MAX;
+       counter->timestamp = get_jiffies_64();
 }
 
 int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -99,6 +102,12 @@ res_counter_member(struct res_counter *counter, int member)
                return &counter->max_usage;
        case RES_LIMIT:
                return &counter->limit;
+       case RES_POLICY:
+               return &counter->policy;
+       case RES_TIMESTAMP:
+               return &counter->timestamp;
+       case RES_CAPACITY:
+               return &counter->capacity;
        case RES_FAILCNT:
                return &counter->failcnt;
        };
@@ -163,3 +172,66 @@ int res_counter_write(struct res_counter *counter, int 
member,
        spin_unlock_irqrestore(&counter->lock, flags);
        return 0;
 }
+
+static unsigned long long
+ratelimit_leaky_bucket(struct res_counter *res, ssize_t val)
+{
+       unsigned long long delta, t;
+
+       res->usage += val;
+       delta = res_counter_ratelimit_delta_t(res);
+       if (!delta)
+               return 0;
+       t = res->usage * USEC_PER_SEC;
+       t = usecs_to_jiffies(div_u64(t, res->limit));
+       if (t > delta)
+               return t - delta;
+       /* Reset i/o statistics */
+       res->usage = 0;
+       res->timestamp = get_jiffies_64();
+       return 0;
+}
+
+static unsigned long long
+ratelimit_token_bucket(struct res_counter *res, ssize_t val)
+{
+       unsigned long long delta;
+       long long tok;
+
+       res->usage -= val;
+       delta = jiffies_to_msecs(res_counter_ratelimit_delta_t(res));
+       res->timestamp = get_jiffies_64();
+       tok = (long long)res->usage * MSEC_PER_SEC;
+       if (delta) {
+               long long max = (long long)res->capacity * MSEC_PER_SEC;
+
+               tok += delta * res->limit;
+               if (tok > max)
+                       tok = max;
+               res->usage = (unsigned long long)div_s64(tok, MSEC_PER_SEC);
+       }
+       return (tok < 0) ? msecs_to_jiffies(div_u64(-tok, res->limit)) : 0;
+}
+
+unsigned long long
+res_counter_ratelimit_sleep(struct res_counter *res, ssize_t val)
+{
+       unsigned long long sleep = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&res->lock, flags);
+       if (res->limit)
+               switch (res->policy) {
+               case RATELIMIT_LEAKY_BUCKET:
+                       sleep = ratelimit_leaky_bucket(res, val);
+                       break;
+               case RATELIMIT_TOKEN_BUCKET:
+                       sleep = ratelimit_token_bucket(res, val);
+                       break;
+               default:
+                       WARN_ON(1);
+                       break;
+               }
+       spin_unlock_irqrestore(&res->lock, flags);
+       return sleep;
+}
-- 
1.5.6.3

_______________________________________________
Containers mailing list
contain...@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
Devel@openvz.org
https://openvz.org/mailman/listinfo/devel

Reply via email to