This patch implements hard CPU rate caps per task as a proportion of a
single CPU's capacity expressed in parts per thousand.

Notes:

1. Simplified calculation of sinbin durations to eliminat need for 64 bit
divide.

Signed-off-by: Peter Williams <[EMAIL PROTECTED]>
 include/linux/sched.h |   22 ++++++++-
 kernel/Kconfig.caps   |   14 +++++
 kernel/sched.c        |  117 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 140 insertions(+), 13 deletions(-)

Index: MM-2.6.17-mm1/include/linux/sched.h
===================================================================
--- MM-2.6.17-mm1.orig/include/linux/sched.h    2006-06-22 10:21:08.000000000 
+1000
+++ MM-2.6.17-mm1/include/linux/sched.h 2006-06-22 10:35:51.000000000 +1000
@@ -804,6 +804,10 @@ struct task_struct {
        unsigned long long avg_cpu_per_cycle, avg_cycle_length;
        unsigned int cpu_rate_cap;
        unsigned int mutexes_held;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+       unsigned int cpu_rate_hard_cap;
+       struct timer_list sinbin_timer;
+#endif
 #endif
        enum sleep_type sleep_type;
 
@@ -1021,12 +1025,28 @@ struct task_struct {
 };
 
 #ifdef CONFIG_CPU_RATE_CAPS
+int set_cpu_rate_cap_low(struct task_struct *, unsigned int, int);
+
 static inline unsigned int get_cpu_rate_cap(const struct task_struct *p)
 {
        return p->cpu_rate_cap;
 }
 
-int set_cpu_rate_cap(struct task_struct *, unsigned int);
+static inline int set_cpu_rate_cap(struct task_struct *p, unsigned int newcap)
+{
+       return set_cpu_rate_cap_low(p, newcap, 0);
+}
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+static inline unsigned int get_cpu_rate_hard_cap(const struct task_struct *p)
+{
+       return p->cpu_rate_hard_cap;
+}
+
+static inline int set_cpu_rate_hard_cap(struct task_struct *p, unsigned int 
newcap)
+{
+       return set_cpu_rate_cap_low(p, newcap, 1);
+}
+#endif
 #endif
 
 static inline pid_t process_group(struct task_struct *tsk)
Index: MM-2.6.17-mm1/kernel/Kconfig.caps
===================================================================
--- MM-2.6.17-mm1.orig/kernel/Kconfig.caps      2006-06-22 10:17:47.000000000 
+1000
+++ MM-2.6.17-mm1/kernel/Kconfig.caps   2006-06-22 10:29:46.000000000 +1000
@@ -3,11 +3,21 @@
 #
 
 config CPU_RATE_CAPS
-       bool "Support (soft) CPU rate caps"
+       bool "Support CPU rate caps"
        default y
        ---help---
-         Say y here if you wish to be able to put a (soft) upper limit on
+         Say y here if you wish to be able to put a soft upper limit on
          the rate of CPU usage by individual tasks.  A task which has been
          allocated a soft CPU rate cap will be limited to that rate of CPU
          usage unless there is spare CPU resources available after the needs
          of uncapped tasks are met.
+
+config CPU_RATE_HARD_CAPS
+       bool "Support CPU rate hard caps"
+       depends on CPU_RATE_CAPS
+       default n
+       ---help---
+         Say y here if you wish to be able to put a hard upper limit on
+         the rate of CPU usage by individual tasks.  A task which has been
+         allocated a hard CPU rate cap will be limited to that rate of CPU
+         usage regardless of whether there is spare CPU resources available.
Index: MM-2.6.17-mm1/kernel/sched.c
===================================================================
--- MM-2.6.17-mm1.orig/kernel/sched.c   2006-06-22 10:26:24.000000000 +1000
+++ MM-2.6.17-mm1/kernel/sched.c        2006-06-22 10:32:15.000000000 +1000
@@ -203,25 +203,39 @@ static inline unsigned int task_timeslic
 #ifdef CONFIG_CPU_RATE_CAPS
 #define CPU_CAP_ONE 1000
 #define CAP_STATS_OFFSET 8
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+static void sinbin_release_fn(unsigned long arg);
+#define min_cpu_rate_cap(p) min((p)->cpu_rate_cap, (p)->cpu_rate_hard_cap)
+#else
+#define min_cpu_rate_cap(p) (p)->cpu_rate_cap
+#endif
 #define task_has_cap(p) unlikely((p)->flags & PF_HAS_CAP)
 /* this assumes that p is not a real time task */
 #define task_is_background(p) unlikely((p)->cpu_rate_cap == 0)
 #define task_being_capped(p) unlikely((p)->prio >= CAPPED_PRIO)
 #define cap_load_weight(p) \
-       (max((int)(((p)->cpu_rate_cap * SCHED_LOAD_SCALE) / CPU_CAP_ONE), 1))
+       (max((int)((min_cpu_rate_cap(p) * SCHED_LOAD_SCALE) / CPU_CAP_ONE), 1))
 #define safe_to_enforce_cap(p) \
-       (!((p)->mutexes_held || (p)->flags & (PF_FREEZE | PF_UIWAKE)))
+       (!((p)->mutexes_held || \
+          (p)->flags & (PF_FREEZE | PF_UIWAKE | PF_EXITING)))
+#define safe_to_sinbin(p) (safe_to_enforce_cap(p) && !signal_pending(p))
 
 static void init_cpu_rate_caps(task_t *p)
 {
        p->cpu_rate_cap = CPU_CAP_ONE;
        p->flags &= ~PF_HAS_CAP;
        p->mutexes_held = 0;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+       p->cpu_rate_hard_cap = CPU_CAP_ONE;
+       init_timer(&p->sinbin_timer);
+       p->sinbin_timer.function = sinbin_release_fn;
+       p->sinbin_timer.data = (unsigned long) p;
+#endif
 }
 
 static inline void set_cap_flag(task_t *p)
 {
-       if (p->cpu_rate_cap < CPU_CAP_ONE && !has_rt_policy(p))
+       if (min_cpu_rate_cap(p) < CPU_CAP_ONE && !has_rt_policy(p))
                p->flags |= PF_HAS_CAP;
        else
                p->flags &= ~PF_HAS_CAP;
@@ -229,7 +243,7 @@ static inline void set_cap_flag(task_t *
 
 static inline int task_exceeding_cap(const task_t *p)
 {
-       return (p->avg_cpu_per_cycle * CPU_CAP_ONE) > (p->avg_cycle_length * 
p->cpu_rate_cap);
+       return (p->avg_cpu_per_cycle * CPU_CAP_ONE) > (p->avg_cycle_length * 
min_cpu_rate_cap(p));
 }
 
 #ifdef CONFIG_SCHED_SMT
@@ -239,7 +253,7 @@ static inline int task_exceeding_cap(con
 static unsigned int smt_timeslice(task_t *p)
 {
        if (task_being_capped(p))
-               return (p->cpu_rate_cap * DEF_TIMESLICE) / CPU_CAP_ONE;
+               return (min_cpu_rate_cap(p) * DEF_TIMESLICE) / CPU_CAP_ONE;
 
        return task_timeslice(p);
 }
@@ -271,7 +285,7 @@ static int task_exceeding_cap_now(const 
        unsigned long long cpc = p->avg_cpu_per_cycle;
 
        delta = (now > p->timestamp) ? (now - p->timestamp) : 0;
-       rhs = (p->avg_cycle_length + delta) * p->cpu_rate_cap;
+       rhs = (p->avg_cycle_length + delta) * min_cpu_rate_cap(p);
        if (oncpu)
                cpc += delta;
 
@@ -283,6 +297,10 @@ static inline void init_cap_stats(task_t
        p->avg_cpu_per_cycle = 0;
        p->avg_cycle_length = 0;
        p->mutexes_held = 0;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+       init_timer(&p->sinbin_timer);
+       p->sinbin_timer.data = (unsigned long) p;
+#endif
 }
 
 static inline void inc_cap_stats_cycle(task_t *p, unsigned long long now)
@@ -315,6 +333,7 @@ static inline void decay_cap_stats(task_
 #define task_being_capped(p) 0
 #define cap_load_weight(p) ((int)SCHED_LOAD_SCALE)
 #define safe_to_enforce_cap(p) 0
+#define safe_to_sinbin(p) 0
 
 static inline void init_cpu_rate_caps(task_t *p)
 {
@@ -1192,6 +1211,63 @@ static void deactivate_task(struct task_
        p->array = NULL;
 }
 
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+#define task_has_hard_cap(p) unlikely((p)->cpu_rate_hard_cap < CPU_CAP_ONE)
+
+/*
+ * Release a task from the sinbin
+ */
+static void sinbin_release_fn(unsigned long arg)
+{
+       unsigned long flags;
+       struct task_struct *p = (struct task_struct*)arg;
+       struct runqueue *rq = task_rq_lock(p, &flags);
+
+       p->prio = effective_prio(p);
+
+       __activate_task(p, rq);
+
+       task_rq_unlock(rq, &flags);
+}
+
+static unsigned long reqd_sinbin_ticks(const task_t *p)
+{
+       unsigned long long lhs = p->avg_cpu_per_cycle * CPU_CAP_ONE;
+       unsigned long long rhs = p->avg_cycle_length * p->cpu_rate_hard_cap;
+
+       if (lhs > rhs) {
+               lhs -= p->avg_cpu_per_cycle;
+               lhs >>= CAP_STATS_OFFSET;
+               /* have to do two divisions because there's no gaurantee
+                * that p->cpu_rate_hard_cap * (1000000000 / HZ) would
+                * not overflow a 32 bit unsigned integer
+                */
+               (void)do_div(lhs, p->cpu_rate_hard_cap);
+               (void)do_div(lhs, (1000000000 / HZ));
+
+               return lhs ? : 1;
+       }
+
+       return 0;
+}
+
+static void sinbin_task(task_t *p, unsigned long durn)
+{
+       if (durn == 0)
+               return;
+       deactivate_task(p, task_rq(p));
+       p->sinbin_timer.expires = jiffies + durn;
+       add_timer(&p->sinbin_timer);
+}
+#else
+#define task_has_hard_cap(p) 0
+#define reqd_sinbin_ticks(p) 0
+
+static inline void sinbin_task(task_t *p, unsigned long durn)
+{
+}
+#endif
+
 /*
  * resched_task - mark a task 'to be rescheduled now'.
  *
@@ -3579,6 +3655,13 @@ need_resched_nonpreemptible:
        if (task_has_cap(prev)) {
                inc_cap_stats_both(prev, now);
                decay_cap_stats(prev);
+               if (task_has_hard_cap(prev) && !prev->state &&
+                   !rt_task(prev) && safe_to_sinbin(prev)) {
+                       unsigned long sinbin_ticks = reqd_sinbin_ticks(prev);
+
+                       if (sinbin_ticks)
+                               sinbin_task(prev, sinbin_ticks);
+               }
        }
 
        cpu = smp_processor_id();
@@ -4532,9 +4615,10 @@ out_unlock:
 
 #ifdef CONFIG_CPU_RATE_CAPS
 /*
- * Require: 0 <= new_cap <= CPU_CAP_ONE
+ * Require: 0 <= new_cap <= CPU_CAP_ONE for hard == 0
+ *          1 <= new_cap <= CPU_CAP_ONE otherwise
  */
-int set_cpu_rate_cap(struct task_struct *p, unsigned int new_cap)
+int set_cpu_rate_cap_low(struct task_struct *p, unsigned int new_cap, int hard)
 {
        int is_allowed;
        unsigned long flags;
@@ -4544,13 +4628,21 @@ int set_cpu_rate_cap(struct task_struct 
 
        if (new_cap > CPU_CAP_ONE)
                return -EINVAL;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+       if (hard && new_cap < 1)
+               return -EINVAL;
+#endif
        is_allowed = capable(CAP_SYS_NICE);
        /*
         * We have to be careful, if called from /proc code,
         * the task might be in the middle of scheduling on another CPU.
         */
        rq = task_rq_lock(p, &flags);
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+       delta = new_cap - (hard ? p->cpu_rate_hard_cap : p->cpu_rate_cap);
+#else
        delta = new_cap - p->cpu_rate_cap;
+#endif
        if (!is_allowed) {
                /*
                 * Ordinary users can set/change caps on their own tasks
@@ -4566,7 +4658,12 @@ int set_cpu_rate_cap(struct task_struct 
         * set - but as expected it wont have any effect on scheduling until
         * the task becomes SCHED_NORMAL/SCHED_BATCH:
         */
-       p->cpu_rate_cap = new_cap;
+#ifdef CONFIG_CPU_RATE_HARD_CAPS
+       if (hard)
+               p->cpu_rate_hard_cap = new_cap;
+       else
+#endif
+               p->cpu_rate_cap = new_cap;
 
        if (has_rt_policy(p))
                goto out;
@@ -4590,7 +4687,7 @@ out:
        return 0;
 }
 
-EXPORT_SYMBOL(set_cpu_rate_cap);
+EXPORT_SYMBOL(set_cpu_rate_cap_low);
 #endif
 
 long sched_setaffinity(pid_t pid, cpumask_t new_mask)

-- 
Peter Williams                                   [EMAIL PROTECTED]

"Learning, n. The kind of ignorance distinguishing the studious."
 -- Ambrose Bierce

All the advantages of Linux Managed Hosting--Without the Cost and Risk!
Fully trained technicians. The highest number of Red Hat certifications in
the hosting industry. Fanatical Support. Click to learn more
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=107521&bid=248729&dat=121642
_______________________________________________
ckrm-tech mailing list
https://lists.sourceforge.net/lists/listinfo/ckrm-tech

Reply via email to