tree 1ef99fd4d7246b2afa16dc7d1514b6ff25fa8284
parent b0d62e6d5b3318b6b722121d945afa295f7201b5
author Chen, Kenneth W <[EMAIL PROTECTED]> Sat, 10 Sep 2005 03:02:02 -0700
committer Linus Torvalds <[EMAIL PROTECTED]> Sat, 10 Sep 2005 03:57:31 -0700

[PATCH] Prefetch kernel stacks to speed up context switch

For architecture like ia64, the switch stack structure is fairly large
(currently 528 bytes).  For context switch intensive application, we found
that significant amount of cache misses occurs in switch_to() function.
The following patch adds a hook in the schedule() function to prefetch
switch stack structure as soon as 'next' task is determined.  This allows
maximum overlap in prefetch cache lines for that structure.

Signed-off-by: Ken Chen <[EMAIL PROTECTED]>
Cc: Ingo Molnar <[EMAIL PROTECTED]>
Cc: "Luck, Tony" <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>

 arch/ia64/kernel/entry.S  |   23 +++++++++++++++++++++++
 include/asm-ia64/system.h |    1 +
 include/linux/sched.h     |    5 +++++
 kernel/sched.c            |    1 +
 4 files changed, 30 insertions(+)

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -470,6 +470,29 @@ ENTRY(load_switch_stack)
        br.cond.sptk.many b7
 END(load_switch_stack)
 
+GLOBAL_ENTRY(prefetch_stack)
+       add r14 = -IA64_SWITCH_STACK_SIZE, sp
+       add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
+       ;;
+       ld8 r16 = [r15]                         // load next's stack pointer
+       lfetch.fault.excl [r14], 128
+       ;;
+       lfetch.fault.excl [r14], 128
+       lfetch.fault [r16], 128
+       ;;
+       lfetch.fault.excl [r14], 128
+       lfetch.fault [r16], 128
+       ;;
+       lfetch.fault.excl [r14], 128
+       lfetch.fault [r16], 128
+       ;;
+       lfetch.fault.excl [r14], 128
+       lfetch.fault [r16], 128
+       ;;
+       lfetch.fault [r16], 128
+       br.ret.sptk.many rp
+END(prefetch_switch_stack)
+
 GLOBAL_ENTRY(execve)
        mov r15=__NR_execve                     // put syscall number in place
        break __BREAK_SYSCALL
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -275,6 +275,7 @@ extern void ia64_load_extra (struct task
  */
 #define __ARCH_WANT_UNLOCKED_CTXSW
 
+#define ARCH_HAS_PREFETCH_SWITCH_STACK
 #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
 
 void cpu_idle_wait(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,11 @@ extern int groups_search(struct group_in
 #define GROUP_AT(gi, i) \
     ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
 
+#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
+extern void prefetch_stack(struct task_struct*);
+#else
+static inline void prefetch_stack(struct task_struct *t) { }
+#endif
 
 struct audit_context;          /* See audit.c */
 struct mempolicy;
diff --git a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2888,6 +2888,7 @@ switch_tasks:
        if (next == rq->idle)
                schedstat_inc(rq, sched_goidle);
        prefetch(next);
+       prefetch_stack(next);
        clear_tsk_need_resched(prev);
        rcu_qsctr_inc(task_cpu(prev));
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to