Module: xenomai-forge
Branch: master
Commit: eb49e00dd0997728e8dd03984e3d819ba76c556a
URL:    
http://git.xenomai.org/?p=xenomai-forge.git;a=commit;h=eb49e00dd0997728e8dd03984e3d819ba76c556a

Author: Gilles Chanteperdrix <gilles.chanteperd...@xenomai.org>
Date:   Fri Oct 25 21:53:56 2013 +0200

cobalt/x86: lazy FPU switching

Only enable XNFPU on first FPU use.
Skip context restore with fxsr when context allows it.

---

 kernel/cobalt/arch/x86/thread.c |  112 ++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 42 deletions(-)

diff --git a/kernel/cobalt/arch/x86/thread.c b/kernel/cobalt/arch/x86/thread.c
index 646d0aa..aa6dabf 100644
--- a/kernel/cobalt/arch/x86/thread.c
+++ b/kernel/cobalt/arch/x86/thread.c
@@ -230,43 +230,61 @@ int xnarch_handle_fpu_fault(struct xnthread *from,
        if (__thread_has_fpu(p))
                return 0;
 
-       /*
-        * The faulting task is a shadow using the FPU for the first
-        * time, initialize the FPU context and tell linux about it.
-        * The fpu usage bit is necessary for xnarch_save_fpu() to
-        * save the FPU state at next switch.
-        */
-       __asm__ __volatile__("clts; fninit");
+       if (tsk_used_math(p) == 0) {
+               /*
+                * The faulting task is a shadow using the FPU for the first
+                * time, initialize the FPU context and tell linux about it.
+                * The fpu usage bit is necessary for xnarch_save_fpu() to
+                * save the FPU state at next switch.
+                */
+               __asm__ __volatile__("clts; fninit");
 
-       if (cpu_has_xmm) {
-               unsigned long __mxcsr = 0x1f80UL & 0xffbfUL;
-               __asm__ __volatile__("ldmxcsr %0"::"m"(__mxcsr));
+               if (cpu_has_xmm) {
+                       unsigned long __mxcsr = 0x1f80UL & 0xffbfUL;
+                       __asm__ __volatile__("ldmxcsr %0"::"m"(__mxcsr));
+               }
+               set_stopped_child_used_math(p);
+       } else {
+               /*
+                * The faulting task already used FPU in secondary
+                * mode.
+                */
+               clts();
+               __do_restore_i387(tcb->fpup);
        }
-
-       set_stopped_child_used_math(p);
+               
        __thread_set_has_fpu(p);
+       xnthread_set_state(to, XNFPU);
 
        return 1;
 }
 
+#define current_task_used_kfpu(p) \
+       (__thread_has_fpu(p) == 0 && (read_cr0() & X86_CR0_TS) == 0)
+#define tcb_used_kfpu(t) ((t)->root_kfpu)
+
 void xnarch_leave_root(struct xnthread *root)
 {
-       struct xnarchtcb *rootcb = xnthread_archtcb(root);
-       struct task_struct *p = current;
+       struct xnarchtcb *const rootcb = xnthread_archtcb(root);
+       struct task_struct *const p = current;
+       x86_fpustate *const current_task_fpup = x86_fpustate_ptr(&p->thread);
 
 #ifdef CONFIG_X86_64
        rootcb->spp = &p->thread.sp;
        rootcb->ipp = &p->thread.rip;
 #endif
-       rootcb->fpup = x86_fpustate_ptr(&p->thread);
-       rootcb->root_kfpu = 
-               (read_cr0() & 8) == 0 && __thread_has_fpu(p) == 0;
-       if (rootcb->root_kfpu) {
-               rootcb->root_used_math = tsk_used_math(p) != 0;
-               x86_fpustate_ptr(&p->thread) = &rootcb->i387;
-               __thread_set_has_fpu(p);
-               set_stopped_child_used_math(p);
+       if (current_task_used_kfpu(p) == 0) {
+               rootcb->root_kfpu = 0;
+               rootcb->fpup = __thread_has_fpu(p) ? current_task_fpup : NULL;
+               return;
        }
+
+       rootcb->root_kfpu = 1;
+       rootcb->fpup = current_task_fpup;
+       rootcb->root_used_math = tsk_used_math(p) != 0;
+       x86_fpustate_ptr(&p->thread) = &rootcb->i387;
+       __thread_set_has_fpu(p);
+       set_stopped_child_used_math(p);
 }
 
 void xnarch_save_fpu(struct xnthread *thread)
@@ -275,40 +293,47 @@ void xnarch_save_fpu(struct xnthread *thread)
        struct task_struct *p = tcb->core.host_task;
 
        if (__thread_has_fpu(p) == 0)
-               /* Common case: already saved by __switch_to */
+               /* Saved by last __switch_to */
                return;
        
-       /* Exceptional case: a migrating thread */
        clts();
 
        __do_save_i387(x86_fpustate_ptr(&p->thread));
        __thread_clear_has_fpu(p);
 }
 
-void xnarch_switch_fpu(struct xnthread *from, struct xnthread *thread)
+void xnarch_switch_fpu(struct xnthread *from, struct xnthread *to)
 {
-       struct xnarchtcb *tcb = xnthread_archtcb(thread);
-       struct task_struct *p = tcb->core.host_task;
-
-       if (tcb->root_kfpu == 0 && 
-               (tsk_used_math(p) == 0 || xnthread_test_state(thread, XNROOT)))
-               /* Restore lazy mode */
+       x86_fpustate *const from_fpup = from ? from->tcb.fpup : NULL;
+       struct xnarchtcb *const tcb = xnthread_archtcb(to);
+       struct task_struct *const p = tcb->core.host_task;
+       x86_fpustate *const current_task_fpup = x86_fpustate_ptr(&p->thread);
+
+       if (xnthread_test_state(to, XNROOT) && from_fpup != current_task_fpup &&
+               tcb_used_kfpu(tcb) == 0)
+               /* Only restore lazy mode if root fpu owner is not current */
                return;
 
+       clts();
        /*
-        * Restore the FPU hardware with valid fp registers from a
-        * RT user-space or kernel thread.
+        * The only case where we can skip restoring the FPU is:
+        * - the fpu context of the current task is the current fpu
+        * context;
+        * - root thread has not used fpu in kernel-space;
+        * - cpu has fxsr (because if it does not, last context switch
+        * reinitialized fpu)
         */
-       clts();
-
-       __do_restore_i387(x86_fpustate_ptr(&p->thread));
-       if (tcb->root_kfpu) {
-               x86_fpustate_ptr(&p->thread) = tcb->fpup;
-               __thread_clear_has_fpu(p);
-               if (tcb->root_used_math == 0)
-                       clear_stopped_child_used_math(p);
-       } else
+       if (from_fpup != current_task_fpup || cpu_has_fxsr == 0)
+               __do_restore_i387(current_task_fpup);
+       if (tcb_used_kfpu(tcb) == 0) {
                __thread_set_has_fpu(p);
+               return;
+       }
+
+       x86_fpustate_ptr(&p->thread) = to->tcb.fpup;
+       __thread_clear_has_fpu(p);
+       if (tcb->root_used_math == 0)
+               clear_stopped_child_used_math(p);
 }
 
 #endif /* CONFIG_XENO_HW_FPU */
@@ -337,6 +362,9 @@ void xnarch_init_shadow_tcb(struct xnthread *thread)
 #endif
        tcb->fpup = x86_fpustate_ptr(&p->thread);
        tcb->root_kfpu = 0;
+
+       /* XNFPU is set upon first FPU fault */
+       xnthread_clear_state(thread, XNFPU);
 }
 
 int xnarch_escalate(void)


_______________________________________________
Xenomai-git mailing list
Xenomai-git@xenomai.org
http://www.xenomai.org/mailman/listinfo/xenomai-git

Reply via email to