packages (Titanium): kernel-desktop/kernel-desktop-sched-bfs.patch up to BF...

cactus Thu, 01 Oct 2009 08:30:16 -0700

Author: cactus                       Date: Thu Oct  1 15:29:55 2009 GMT
Module: packages                      Tag: Titanium
---- Log message:
up to BFS 300


---- Files affected:
packages/kernel-desktop:
   kernel-desktop-sched-bfs.patch (1.1.2.10 -> 1.1.2.11) 

---- Diffs:

================================================================
Index: packages/kernel-desktop/kernel-desktop-sched-bfs.patch
diff -u packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.10 
packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.11
--- packages/kernel-desktop/kernel-desktop-sched-bfs.patch:1.1.2.10     Mon Sep 
28 13:15:03 2009
+++ packages/kernel-desktop/kernel-desktop-sched-bfs.patch      Thu Oct  1 
17:29:50 2009
@@ -8278,3 +8278,594 @@
        k->flags |= PF_THREAD_BOUND;
  }
  EXPORT_SYMBOL(kthread_bind);
+Index: linux-2.6.31-test/kernel/sched_bfs.c
+===================================================================
+--- linux-2.6.31-test.orig/kernel/sched_bfs.c  2009-10-01 12:24:56.538334919 
+1000
++++ linux-2.6.31-test/kernel/sched_bfs.c       2009-10-01 12:30:25.539335484 
+1000
+@@ -147,13 +147,24 @@ int rr_interval __read_mostly = 6;
+  */
+ int sched_iso_cpu __read_mostly = 70;
+
++/*
++ * The relative length of deadline for each priority(nice) level.
++ */
+ int prio_ratios[PRIO_RANGE] __read_mostly;
+
++/*
++ * The quota handed out to tasks of all priority levels when refilling their
++ * time_slice.
++ */
+ static inline unsigned long timeslice(void)
+ {
+       return MS_TO_US(rr_interval);
+ }
+
++/*
++ * The global runqueue data that all CPUs work off. All data is protected
++ * by grq.lock.
++ */
+ struct global_rq {
+       spinlock_t lock;
+       unsigned long nr_running;
+@@ -169,11 +180,12 @@ struct global_rq {
+ #endif
+ };
+
++/* There can be only one */
+ static struct global_rq grq;
+
+ /*
+  * This is the main, per-CPU runqueue data structure.
+- * All this is protected by the global_rq lock.
++ * This data should only be modified by the local cpu.
+  */
+ struct rq {
+ #ifdef CONFIG_SMP
+@@ -204,6 +216,7 @@ struct rq {
+ #ifdef CONFIG_SMP
+       struct root_domain *rd;
+       struct sched_domain *sd;
++      unsigned long *cpu_locality; /* CPU relative cache distance */
+
+       struct list_head migration_queue;
+ #endif
+@@ -272,7 +285,6 @@ struct root_domain {
+  * members (mimicking the global state we have today).
+  */
+ static struct root_domain def_root_domain;
+-
+ #endif
+
+ static inline int cpu_of(struct rq *rq)
+@@ -308,6 +320,11 @@ static inline int cpu_of(struct rq *rq)
+ # define finish_arch_switch(prev)     do { } while (0)
+ #endif
+
++/*
++ * All common locking functions performed on grq.lock. rq->clock is local to
++ * the cpu accessing it so it can be modified just with interrupts disabled,
++ * but looking up task_rq must be done under grq.lock to be safe.
++ */
+ inline void update_rq_clock(struct rq *rq)
+ {
+       rq->clock = sched_clock_cpu(cpu_of(rq));
+@@ -321,7 +338,6 @@ static inline int task_running(struct ta
+ static inline void grq_lock(void)
+       __acquires(grq.lock)
+ {
+-      smp_mb();
+       spin_lock(&grq.lock);
+ }
+
+@@ -334,15 +350,14 @@ static inline void grq_unlock(void)
+ static inline void grq_lock_irq(void)
+       __acquires(grq.lock)
+ {
+-      smp_mb();
+       spin_lock_irq(&grq.lock);
+ }
+
+ static inline void time_lock_grq(struct rq *rq)
+       __acquires(grq.lock)
+ {
+-      grq_lock();
+       update_rq_clock(rq);
++      grq_lock();
+ }
+
+ static inline void grq_unlock_irq(void)
+@@ -354,8 +369,7 @@ static inline void grq_unlock_irq(void)
+ static inline void grq_lock_irqsave(unsigned long *flags)
+       __acquires(grq.lock)
+ {
+-      local_irq_save(*flags);
+-      grq_lock();
++      spin_lock_irqsave(&grq.lock, *flags);
+ }
+
+ static inline void grq_unlock_irqrestore(unsigned long *flags)
+@@ -491,14 +505,11 @@ static inline void finish_lock_switch(st
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+ /*
+- * A task that is queued will be on the grq run list.
++ * A task that is queued but not running will be on the grq run list.
+  * A task that is not running or queued will not be on the grq run list.
+- * A task that is currently running will have ->oncpu set and be queued
+- * temporarily in its own rq queue.
+- * A task that is running and no longer queued will be seen only on
+- * context switch exit.
++ * A task that is currently running will have ->oncpu set but not on the
++ * grq run list.
+  */
+-
+ static inline int task_queued(struct task_struct *p)
+ {
+       return (!list_empty(&p->run_list));
+@@ -618,6 +629,19 @@ static inline void resched_suitable_idle
+               wake_up_idle_cpu(first_cpu(tmp));
+ }
+
++/*
++ * The cpu cache locality difference between CPUs is used to determine how far
++ * to offset the virtual deadline. "One" difference in locality means that one
++ * timeslice difference is allowed longer for the cpu local tasks. This is
++ * enough in the common case when tasks are up to 2* number of CPUs to keep
++ * tasks within their shared cache CPUs only. See sched_init_smp for how
++ * locality is determined.
++ */
++static inline int
++cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
++{
++      return rq->cpu_locality[task_rq->cpu] * task_timeslice(p);
++}
+ #else /* CONFIG_SMP */
+ static inline void inc_qnr(void)
+ {
+@@ -649,6 +673,12 @@ static inline int suitable_idle_cpus(str
+ static inline void resched_suitable_idle(struct task_struct *p)
+ {
+ }
++
++static inline int
++cache_distance(struct rq *task_rq, struct rq *rq, struct task_struct *p)
++{
++      return 0;
++}
+ #endif /* CONFIG_SMP */
+
+ /*
+@@ -904,9 +934,13 @@ unsigned long wait_task_inactive(struct
+                * We do the initial early heuristics without holding
+                * any task-queue locks at all. We'll only try to get
+                * the runqueue lock when things look like they will
+-               * work out!
++               * work out! In the unlikely event rq is dereferenced
++               * since we're lockless, grab it again.
+                */
++retry_rq:
+               rq = task_rq(p);
++              if (unlikely(!rq))
++                      goto retry_rq;
+
+               /*
+                * If the task is actively running on another CPU
+@@ -915,9 +949,9 @@ unsigned long wait_task_inactive(struct
+                *
+                * NOTE! Since we don't hold any locks, it's not
+                * even sure that "rq" stays as the right runqueue!
+-               * But we don't care, since this will
+-               * return false if the runqueue has changed and p
+-               * is actually now running somewhere else!
++               * But we don't care, since this will return false
++               * if the runqueue has changed and p is actually now
++               * running somewhere else!
+                */
+               while (task_running(p) && p == rq->curr) {
+                       if (match_state && unlikely(p->state != match_state))
+@@ -1012,19 +1046,22 @@ EXPORT_SYMBOL_GPL(kick_process);
+
+ /*
+  * RT tasks preempt purely on priority. SCHED_NORMAL tasks preempt on the
+- * basis of earlier deadlines. SCHED_BATCH and SCHED_IDLEPRIO don't preempt,
+- * they cooperatively multitask.
++ * basis of earlier deadlines. SCHED_BATCH, ISO and IDLEPRIO don't preempt
++ * between themselves, they cooperatively multitask.
+  */
+ static inline int task_preempts_curr(struct task_struct *p, struct rq *rq)
+ {
+-      int preempts = 0;
+-
+       if (p->prio < rq->rq_prio)
+-              preempts = 1;
+-      else if (p->policy == SCHED_NORMAL && (p->prio == rq->rq_prio &&
+-               time_before(p->deadline, rq->rq_deadline)))
+-                      preempts = 1;
+-      return preempts;
++              return 1;
++      if (p->policy == SCHED_NORMAL) {
++              unsigned long p_deadline = p->deadline +
++                      cache_distance(task_rq(p), rq, p);
++
++              if ((p->prio == rq->rq_prio &&
++                  time_before(p_deadline, rq->rq_deadline)))
++                      return 1;
++      }
++      return 0;
+ }
+
+ /*
+@@ -1119,6 +1156,9 @@ static int try_to_wake_up(struct task_st
+       int success = 0;
+       struct rq *rq;
+
++      /* This barrier is undocumented, probably for p->state? くそ */
++      smp_wmb();
++
+       /*
+        * No need to do time_lock_grq as we only need to update the rq clock
+        * if we activate the task
+@@ -1126,7 +1166,7 @@ static int try_to_wake_up(struct task_st
+       rq = task_grq_lock(p, &flags);
+
+       /* state is a volatile long, どうして、分からない */
+-      if (!(unsigned int)p->state & state)
++      if (!((unsigned int)p->state & state))
+               goto out_unlock;
+
+       if (task_queued(p) || task_running(p))
+@@ -1273,7 +1313,7 @@ void wake_up_new_task(struct task_struct
+ /*
+  * Potentially available exiting-child timeslices are
+  * retrieved here - this way the parent does not get
+- * penalized for creating too many threads.
++ * penalised for creating too many threads.
+  *
+  * (this cannot be used to 'generate' timeslices
+  * artificially, because any timeslice recovered here
+@@ -1286,11 +1326,22 @@ void sched_exit(struct task_struct *p)
+       struct rq *rq;
+
+       if (p->first_time_slice) {
++              int *par_tslice, *p_tslice;
++
+               parent = p->parent;
+               rq = task_grq_lock(parent, &flags);
+-              parent->time_slice += p->time_slice;
+-              if (unlikely(parent->time_slice > timeslice()))
+-                      parent->time_slice = timeslice();
++              par_tslice = &parent->time_slice;
++              p_tslice = &p->time_slice;
++
++              /* The real time_slice of the "curr" task is on the rq var.*/
++              if (p == rq->curr)
++                      p_tslice = &rq->rq_time_slice;
++              else if (parent == task_rq(parent)->curr)
++                      par_tslice = &rq->rq_time_slice;
++
++              *par_tslice += *p_tslice;
++              if (unlikely(*par_tslice > timeslice()))
++                      *par_tslice = timeslice();
+               task_grq_unlock(&flags);
+       }
+ }
+@@ -1940,20 +1991,17 @@ void account_idle_ticks(unsigned long ti
+  * quota as real time scheduling and convert them back to SCHED_NORMAL.
+  * Where possible, the data is tested lockless, to avoid grabbing grq_lock
+  * because the occasional inaccurate result won't matter. However the
+- * data is only ever modified under lock.
++ * tick data is only ever modified under lock. iso_refractory is only simply
++ * set to 0 or 1 so it's not worth grabbing the lock yet again for that.
+  */
+ static void set_iso_refractory(void)
+ {
+-      grq_lock();
+       grq.iso_refractory = 1;
+-      grq_unlock();
+ }
+
+ static void clear_iso_refractory(void)
+ {
+-      grq_lock();
+       grq.iso_refractory = 0;
+-      grq_unlock();
+ }
+
+ /*
+@@ -2133,7 +2181,7 @@ static inline int longest_deadline(void)
+ }
+
+ /*
+- * SCHED_IDLEPRIO tasks still have a deadline set, but offset by to nice +19.
++ * SCHED_IDLEPRIO tasks still have a deadline set, but offset by nice +19.
+  * This allows nice levels to work between IDLEPRIO tasks and gives a
+  * deadline longer than nice +19 for when they're scheduled as SCHED_NORMAL
+  * tasks.
+@@ -2202,10 +2250,9 @@ retry:
+                * there is no need to initialise earliest_deadline
+                * before. Normalise all old deadlines to now.
+                */
+-              if (time_before(p->deadline, jiffies))
++              dl = p->deadline + cache_distance(task_rq(p), rq, p);
++              if (time_before(dl, jiffies))
+                       dl = jiffies;
+-              else
+-                      dl = p->deadline;
+
+               if (edt == idle ||
+                   time_before(dl, earliest_deadline)) {
+@@ -2278,6 +2325,12 @@ static inline void set_rq_task(struct rq
+       rq->rq_prio = p->prio;
+ }
+
++static void reset_rq_task(struct rq *rq, struct task_struct *p)
++{
++      rq->rq_policy = p->policy;
++      rq->rq_prio = p->prio;
++}
++
+ /*
+  * schedule() is the main scheduler function.
+  */
+@@ -2361,7 +2414,7 @@ need_resched_nonpreemptible:
+               rq->curr = next;
+               ++*switch_count;
+
+-              context_switch(rq, prev, next); /* unlocks the rq */
++              context_switch(rq, prev, next); /* unlocks the grq */
+               /*
+                * the context switch might have flipped the stack from under
+                * us, hence refresh the local variables.
+@@ -2522,7 +2575,7 @@ void __wake_up_locked_key(wait_queue_hea
+  *
+  * The sync wakeup differs that the waker knows that it will schedule
+  * away soon, so while the target thread will be woken up, it will not
+- * be migrated to another CPU - ie. the two threads are 'synchronized'
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
+  * with each other. This can prevent needless bouncing between CPUs.
+  *
+  * On UP it can prevent extra preemption.
+@@ -2556,7 +2609,7 @@ EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+  *
+  * The sync wakeup differs that the waker knows that it will schedule
+  * away soon, so while the target thread will be woken up, it will not
+- * be migrated to another CPU - ie. the two threads are 'synchronized'
++ * be migrated to another CPU - ie. the two threads are 'synchronised'
+  * with each other. This can prevent needless bouncing between CPUs.
+  *
+  * On UP it can prevent extra preemption.
+@@ -2921,8 +2974,10 @@ void set_user_nice(struct task_struct *p
+       }
+
+       /* Just resched the task, schedule() will know what to do. */
+-      if (task_running(p))
++      if (task_running(p)) {
+               resched_task(p);
++              reset_rq_task(rq, p);
++      }
+ out_unlock:
+       task_grq_unlock(&flags);
+ }
+@@ -3060,8 +3115,10 @@ __setscheduler(struct task_struct *p, st
+        * Reschedule if running. schedule() will know if it can continue
+        * running or not.
+        */
+-      if (task_running(p))
++      if (task_running(p)) {
+               resched_task(p);
++              reset_rq_task(rq, p);
++      }
+ }
+
+ /*
+@@ -3824,7 +3881,7 @@ void show_state_filter(unsigned long sta
+  * NOTE: this function does not set the idle thread's NEED_RESCHED
+  * flag, to make booting more robust.
+  */
+-void __cpuinit init_idle(struct task_struct *idle, int cpu)
++void init_idle(struct task_struct *idle, int cpu)
+ {
+       struct rq *rq = cpu_rq(cpu);
+       unsigned long flags;
+@@ -3972,7 +4029,7 @@ void wake_up_idle_cpu(int cpu)
+        * This is safe, as this function is called with the timer
+        * wheel base lock of (cpu) held. When the CPU is on the way
+        * to idle and has not yet set rq->curr to idle then it will
+-       * be serialized on the timer wheel base lock and take the new
++       * be serialised on the timer wheel base lock and take the new
+        * timer into account automatically.
+        */
+       if (unlikely(rq->curr != idle))
+@@ -4441,7 +4498,7 @@ early_initcall(migration_init);
+ #endif
+
+ /*
+- * sched_domains_mutex serializes calls to arch_init_sched_domains,
++ * sched_domains_mutex serialises calls to arch_init_sched_domains,
+  * detach_destroy_domains and partition_sched_domains.
+  */
+ static DEFINE_MUTEX(sched_domains_mutex);
+@@ -5077,7 +5134,7 @@ static void free_sched_groups(const stru
+ #endif /* CONFIG_NUMA */
+
+ /*
+- * Initialize sched groups cpu_power.
++ * Initialise sched groups cpu_power.
+  *
+  * cpu_power indicates the capacity of sched group, which is used while
+  * distributing the load between different sched groups in a sched domain.
+@@ -5129,7 +5186,7 @@ static void init_sched_groups_power(int
+ }
+
+ /*
+- * Initializers for schedule domains
++ * Initialisers for schedule domains
+  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
+  */
+
+@@ -5536,7 +5593,7 @@ static struct sched_domain_attr *dattr_c
+ static cpumask_var_t fallback_doms;
+
+ /*
+- * arch_update_cpu_topology lets virtualized architectures update the
++ * arch_update_cpu_topology lets virtualised architectures update the
+  * cpu core maps. It is supposed to return 1 if the topology changed
+  * or 0 if it stayed the same.
+  */
+@@ -5827,6 +5884,9 @@ static int update_runtime(struct notifie
+
+ void __init sched_init_smp(void)
+ {
++      struct sched_domain *sd;
++      int cpu;
++
+       cpumask_var_t non_isolated_cpus;
+
+       alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
+@@ -5866,6 +5926,35 @@ void __init sched_init_smp(void)
+        * fashion.
+        */
+       rr_interval *= 1 + ilog2(num_online_cpus());
++
++      /*
++       * Set up the relative cache distance of each online cpu from each
++       * other in a simple array for quick lookup. Locality is determined
++       * by the closest sched_domain that CPUs are separated by. CPUs with
++       * shared cache in SMT and MC are treated as local. Separate CPUs
++       * (within the same package or physically) within the same node are
++       * treated as not local. CPUs not even in the same domain (different
++       * nodes) are treated as very distant.
++       */
++      for_each_online_cpu(cpu) {
++              for_each_domain(cpu, sd) {
++                      struct rq *rq = cpu_rq(cpu);
++                      unsigned long locality;
++                      int other_cpu;
++
++                      if (sd->level <= SD_LV_MC)
++                              locality = 0;
++                      else if (sd->level <= SD_LV_NODE)
++                              locality = 1;
++                      else
++                              continue;
++
++                      for_each_cpu_mask(other_cpu, *sched_domain_span(sd)) {
++                              if (locality < rq->cpu_locality[other_cpu])
++                                      rq->cpu_locality[other_cpu] = locality;
++                      }
++              }
++      }
+ }
+ #else
+ void __init sched_init_smp(void)
+@@ -5882,7 +5971,7 @@ int in_sched_functions(unsigned long add
+               && addr < (unsigned long)__sched_text_end);
+ }
+
+-void sched_init(void)
++void __init sched_init(void)
+ {
+       int i;
+       int highest_cpu = 0;
+@@ -5925,6 +6014,18 @@ void sched_init(void)
+
+ #ifdef CONFIG_SMP
+       nr_cpu_ids = highest_cpu + 1;
++      for_each_possible_cpu(i) {
++              struct rq *rq = cpu_rq(i);
++              int j;
++
++              rq->cpu_locality = kmalloc(nr_cpu_ids * sizeof(unsigned long), 
GFP_NOWAIT);
++              for_each_possible_cpu(j) {
++                      if (i == j)
++                              rq->cpu_locality[j] = 0;
++                      else
++                              rq->cpu_locality[j] = 4;
++              }
++      }
+ #endif
+
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+@@ -6051,7 +6152,7 @@ struct task_struct *curr_task(int cpu)
+  * Description: This function must only be used when non-maskable interrupts
+  * are serviced on a separate stack.  It allows the architecture to switch the
+  * notion of the current task on a cpu in a non-blocking manner.  This 
function
+- * must be called with all CPU's synchronized, and interrupts disabled, the
++ * must be called with all CPU's synchronised, and interrupts disabled, the
+  * and caller must save the original value of the current task (see
+  * curr_task() above) and restore that value before reenabling interrupts and
+  * re-starting the system.
+Index: linux-2.6.31-test/kernel/Kconfig.preempt
+===================================================================
+--- linux-2.6.31-test.orig/kernel/Kconfig.preempt      2009-10-01 
12:24:56.552354234 +1000
++++ linux-2.6.31-test/kernel/Kconfig.preempt   2009-10-01 12:30:25.539335484 
+1000
+@@ -1,7 +1,7 @@
+
+ choice
+       prompt "Preemption Model"
+-      default PREEMPT
++      default PREEMPT_NONE
+
+ config PREEMPT_NONE
+       bool "No Forced Preemption (Server)"
+@@ -16,6 +16,23 @@ config PREEMPT_NONE
+         raw processing power of the kernel, irrespective of scheduling
+         latencies.
+
++config PREEMPT_VOLUNTARY
++      bool "Voluntary Kernel Preemption (Desktop)"
++      help
++        This option reduces the latency of the kernel by adding more
++        "explicit preemption points" to the kernel code. These new
++        preemption points have been selected to reduce the maximum
++        latency of rescheduling, providing faster application reactions,
++        at the cost of slightly lower throughput.
++
++        This allows reaction to interactive events by allowing a
++        low priority process to voluntarily preempt itself even if it
++        is in kernel mode executing a system call. This allows
++        applications to run more 'smoothly' even when the system is
++        under load.
++
++        Select this if you are building a kernel for a desktop system.
++
+ config PREEMPT
+       bool "Preemptible Kernel (Low-Latency Desktop)"
+       help
+Index: linux-2.6.31-test/init/main.c
+===================================================================
+--- linux-2.6.31-test.orig/init/main.c 2009-09-10 11:45:38.000000000 +1000
++++ linux-2.6.31-test/init/main.c      2009-10-01 12:30:25.539335484 +1000
+@@ -843,6 +843,8 @@ static noinline int init_post(void)
+       system_state = SYSTEM_RUNNING;
+       numa_default_policy();
+
++      printk(KERN_INFO"Running BFS CPU scheduler v0.300 by Con Kolivas.\n");
++
+       if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+               printk(KERN_WARNING "Warning: unable to open an initial 
console.\n");
+
+Index: linux-2.6.31-test/kernel/exit.c
+===================================================================
+--- linux-2.6.31-test.orig/kernel/exit.c       2009-10-01 12:24:56.541364845 
+1000
++++ linux-2.6.31-test/kernel/exit.c    2009-10-01 12:30:25.541335390 +1000
+@@ -206,6 +206,7 @@ repeat:
+                       leader->exit_state = EXIT_DEAD;
+       }
+
++      sched_exit(p);
+       write_unlock_irq(&tasklist_lock);
+       release_thread(p);
+       call_rcu(&p->rcu, delayed_put_task_struct);
+Index: linux-2.6.31-test/include/linux/sched.h
+===================================================================
+--- linux-2.6.31-test.orig/include/linux/sched.h       2009-10-01 
12:24:56.486614782 +1000
++++ linux-2.6.31-test/include/linux/sched.h    2009-10-01 12:30:25.543335645 
+1000
+@@ -1795,6 +1795,7 @@ extern void wake_up_new_task(struct task
+  static inline void kick_process(struct task_struct *tsk) { }
+ #endif
+ extern void sched_fork(struct task_struct *p, int clone_flags);
++extern void sched_exit(struct task_struct *p);
<<Diff was trimmed, longer than 597 lines>>

---- CVS-web:
    
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/packages/kernel-desktop/kernel-desktop-sched-bfs.patch?r1=1.1.2.10&r2=1.1.2.11&f=u

_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

packages (Titanium): kernel-desktop/kernel-desktop-sched-bfs.patch up to BF...

Reply via email to