Module: xenomai-gch
Branch: fpu-tracer
Commit: 5fb185858c95c2f6cd45565efdecd72a28a2152f
URL:    
http://git.xenomai.org/?p=xenomai-gch.git;a=commit;h=5fb185858c95c2f6cd45565efdecd72a28a2152f

Author: Gilles Chanteperdrix <gilles.chanteperd...@xenomai.org>
Date:   Sat Apr 11 20:32:11 2009 +0200

FPU tracer.

Working on x86_32 and x86_64.

---

 include/asm-generic/fp_dump.h             |  149 ++++++++
 include/asm-powerpc/fp_dump.h             |  126 +++++++
 include/asm-x86/bits/pod_32.h             |   49 ++-
 include/asm-x86/bits/pod_64.h             |   50 +++-
 include/asm-x86/fp_dump.h                 |  175 +++++++++
 include/nucleus/fpu_trace.h               |   49 +++
 ksrc/arch/x86/patches/fp_tracepoints.diff |  107 ++++++
 ksrc/nucleus/Makefile                     |    4 +-
 ksrc/nucleus/fpu_trace.c                  |  566 +++++++++++++++++++++++++++++
 ksrc/nucleus/pod.c                        |    2 +
 ksrc/nucleus/shadow.c                     |    4 +
 src/testsuite/switchtest/switchtest.c     |    2 +
 12 files changed, 1265 insertions(+), 18 deletions(-)

diff --git a/include/asm-generic/fp_dump.h b/include/asm-generic/fp_dump.h
new file mode 100644
index 0000000..fe83ff0
--- /dev/null
+++ b/include/asm-generic/fp_dump.h
@@ -0,0 +1,149 @@
+#ifndef FP_DUMP_INNER_H
+#define FP_DUMP_INNER_H
+
+#include <linux/kernel.h>      /* For printk */
+#include <linux/bitops.h>      /* For fls */
+#include <asm/div64.h>         /* For do_div */
+
+#ifndef fp_print
+#define fp_print(cookie, fmt, args...) printk(fmt , ##args)
+#endif /* fp_print */
+
+/* Float in course of conversion from base 2 to base 10.
+   f == significand * 2 ^ (exp - 63) * 10 ^ pow10. */
+typedef struct fp_conv {
+       unsigned long long significand;
+       int pow10;
+       int exp;
+} fp_conv_t;
+
+static inline unsigned __attribute__((const)) fp_sq(unsigned x)
+{
+       return x * x;
+}
+
+static inline unsigned __attribute__((const)) fp_pow3(unsigned x)
+{
+       return x * fp_sq(x);
+}
+
+static inline unsigned __attribute__((const)) fp_pow6(unsigned x)
+{
+       return fp_sq(fp_pow3(x));
+}
+
+static inline unsigned __attribute__((const)) fp_pow9(unsigned x)
+{
+       return fp_pow3(fp_pow3(x));
+}
+
+#define fp_flnzul(word) (fls(word) - 1)
+
+static inline unsigned long fp_flnzull(unsigned long long ull)
+{
+       unsigned long h = (ull >> 32);
+       if(h)
+               return 32 + fp_flnzul(h);
+       return fp_flnzul((unsigned long) ull);
+}
+
+static inline unsigned long upow(unsigned long x, unsigned long n)
+{
+       switch(n) {
+        case 1: return x;
+        case 2: return fp_sq(x);
+        case 3: return fp_pow3(x);
+        case 6: return fp_pow6(x);
+        case 9: return fp_pow9(x);
+       }
+
+       return -1;
+}
+
+/* Divide significand by a power of two and multiply by a power of 10. */
+static inline void shiftmul(fp_conv_t *fp, unsigned shift, unsigned pow10)
+{
+       /* Avoid overflow. */
+       if((fp->significand & (1ULL << 63))) {
+               fp->significand >>= 1;
+               fp->exp++;
+       }
+
+       fp->significand >>= shift - pow10 -1;
+       fp->significand *= upow(5, pow10);
+       fp->exp += shift - 1;
+       fp->pow10 -= pow10;
+}
+
+/* Divide significand by a power of ten and multiply by a power of 2. */
+static inline void divshift(fp_conv_t *fp, unsigned pow10, unsigned shift)
+{
+       do_div(fp->significand, upow(5, pow10));
+       fp->significand <<= shift - pow10 -1;
+       fp->exp -= shift - 1;
+       fp->pow10 += pow10;
+
+       /* Maintain accuracy. */
+       if(!(fp->significand & (1ULL << 63))) {
+               fp->significand <<= 1;
+               fp->exp--;
+        }
+}
+
+static inline void fp_disp(void *cookie, fp_conv_t *fp, int neg)
+{
+       /* Normalize. */
+       if(fp->significand && !(fp->significand & (1ULL << 63))) {
+               unsigned shift = 63 - fp_flnzull(fp->significand);
+               fp->significand <<= shift;
+               fp->exp -= shift;
+        }
+
+       /* Convert power of 2 exponent to power of 10,
+          bring exponent between 0 and 63 for integers,
+          between 0 and 29 for non integers. */
+       while(fp->exp <= 0)
+               shiftmul(fp, 30, 9);
+       while(fp->exp > 63 || (fp->pow10 && fp->exp >= 29))
+               divshift(fp, 9, 30);
+
+       /* For non-integer, bring integer part between 0 and 10. */
+       if(fp->pow10) {
+               if(fp->exp >= 19)
+                       divshift(fp, 6, 20);
+               if(fp->exp >= 9)
+                       divshift(fp, 3, 10);
+               if(fp->exp >= 6)
+                       divshift(fp, 2, 7);
+               if(fp->exp > 3)
+                       divshift(fp, 1, 4);
+        }
+
+       {
+               unsigned long long tmp, i;
+               unsigned long f, rem;
+               /* Integer part. */
+               i = fp->significand >> (63 - fp->exp);
+
+               /* Fractionary part, left-aligned. */
+               tmp = (fp->significand << (fp->exp + 1));
+
+               /* Convert to base 10, to 9 places. */
+               tmp >>= 21;
+               tmp *= fp_pow9(5);
+
+               /* Round to 32 bits. */
+               f = tmp >> 34;
+               rem = (tmp >> 2);
+               if(rem > (1U << 31) ||
+                  (rem == (1U << 31) && (f % 1)) /* Round to even. */)
+                       ++f;
+
+               /* Display. */
+               fp_print(cookie, "%c%llu.%09lu", neg ? '-' : ' ', i, f);
+               if(fp->pow10)
+                       fp_print(cookie, "E%+d", fp->pow10);
+       }
+}
+
+#endif /* FP_DUMP_INNER_H */
diff --git a/include/asm-powerpc/fp_dump.h b/include/asm-powerpc/fp_dump.h
new file mode 100644
index 0000000..1c15389
--- /dev/null
+++ b/include/asm-powerpc/fp_dump.h
@@ -0,0 +1,126 @@
+#ifndef FP_DUMP_PPC_H
+#define FP_DUMP_PPC_H
+
+#define flnzul(word)                                            \
+({                                                              \
+    unsigned long out;                                          \
+    /* Derived from bitops.h's ffs() */                         \
+    __asm__ ("cntlzw %0, %1" : "=r" (out) : "r" (word));        \
+    31 - out;                                                   \
+})
+
+#if defined(__KERNEL__) && BITS_PER_LONG == 32
+#include <asm/div64.h>
+
+#define ulldiv(ull, uld) __div64_32(&ull, uld)
+#else /* !__KERNEL__ || BITS_PER_LONG == 64 */
+#define ulldiv(ull, uld)               \
+({                                     \
+    unsigned long _r = (ull) % (uld);  \
+    ull /= uld;                        \
+    _r;                                \
+})
+#endif /* __KERNEL__ */
+
+static inline unsigned long flnzull(unsigned long long ull)
+{
+    unsigned long h = (ull >> 32);
+    if(h)
+        return 32 + flnzul(h);
+    return flnzul((unsigned long) ull);
+}
+
+#include <fp_dump_inner.h>
+
+typedef struct fpreg {
+    unsigned long fractionh;
+    unsigned long fractionl : 20;
+    unsigned short exp : 11;    /* signed, but not using 2's complement. */
+    unsigned short sign : 1;
+} fpreg_t;
+
+typedef struct fpenv {
+
+    /* This layout must follow exactely the definition of the FPU
+       backup area in a PPC thread struct available from
+       <asm-ppc/processor.h>. Specifically, fpr[] an fpscr words must
+       be contiguous in memory (see arch/ppc/hal/fpu.S). */
+
+    fpreg_t fpr[32];
+    unsigned long fpscr_pad;   /* <= Hi-word of the FPR used to */
+    unsigned long fpscr;       /* retrieve the FPSCR. */
+
+} fpenv_t;
+
+void rthal_save_fpu(fpenv_t *fpuenv);
+
+void rthal_restore_fpu(fpenv_t *fpuenv);
+
+#define fpenv_save rthal_save_fpu
+
+static inline void fpenv_dump(fpenv_t *fpenv)
+{
+    unsigned i;
+
+    fp_print("FPU state:\n");
+    fp_print("fpscr: 0x%08lx\n", fpenv->fpscr);
+    for(i = 0; i < 32; i++)
+        {
+        fpreg_t *reg = &fpenv->fpr[i];
+        fp_conv_t fp = {
+            significand:  ( (1ULL << 63)
+                            + (((unsigned long long) reg->fractionh) << 31)
+                            + (((unsigned long long) reg->fractionl) << 11)),
+            exp: (int) (reg->exp - 1023) - 11,
+            pow10: 0,
+        };
+
+        fp_print("R%2u: ", i);
+
+        switch(reg->exp)
+            {
+            case 0:
+                /* zero or denormalized. */
+                if(!reg->fractionh && !reg->fractionl)
+                    {
+                    fp_print("0");
+                    break;
+                    }
+
+                fp_print("(DEN) ");
+                fp.significand <<= 1;
+                fp.exp--;
+                /* Fall through normalized. */
+
+            default:
+                /* normalized. */
+                disp(&fp, reg->sign);
+                break;
+
+            case 2047:
+                /* infinities or nans. */
+                if(!reg->fractionh && !reg->fractionl)
+                    fp_print("%c inf.", reg->sign ? '-' : '+');
+                else
+                    fp_print("%cnan.", ( reg->fractionh & (1UL << 31)
+                                         ? 'Q' : 'S' ));
+                break;
+            }
+        fp_print("    %c", i % 2 ? '\n' : ' ');
+        }
+}
+
+static inline int fpenv_cmp(fpenv_t *env1p, fpenv_t *env2p)
+{
+    unsigned i;
+
+    /* Only compare the values of the 32 registers; do not take status/control
+       registers value into account. */
+    for(i = 0; i < 32; i++)
+        if(memcmp(&env1p->fpr[i], &env2p->fpr[i], sizeof(fpreg_t)))
+            return 1;
+    return 0;
+}
+
+
+#endif /* FP_DUMP_PPC_H */
diff --git a/include/asm-x86/bits/pod_32.h b/include/asm-x86/bits/pod_32.h
index 6e79382..5778295 100644
--- a/include/asm-x86/bits/pod_32.h
+++ b/include/asm-x86/bits/pod_32.h
@@ -23,6 +23,7 @@
 #define _XENO_ASM_X86_BITS_POD_32_H
 #define _XENO_ASM_X86_BITS_POD_H
 
+#include <nucleus/fpu_trace.h>
 #include <asm-generic/xenomai/bits/pod.h>
 #include <asm/xenomai/switch.h>
 
@@ -167,6 +168,7 @@ static inline void xnarch_init_thread(xnarchtcb_t * tcb,
 static inline void xnarch_init_fpu(xnarchtcb_t * tcb)
 {
        struct task_struct *task = tcb->user_task;
+       unsigned branch;
        /* Initialize the FPU for a task. This must be run on behalf of the
           task. */
 
@@ -184,33 +186,45 @@ static inline void xnarch_init_fpu(xnarchtcb_t * tcb)
                   switch. */
                xnarch_set_fpu_init(task);
                wrap_set_fpu_used(task);
-       }
+               branch = 1;
+       } else
+               branch = 2;
+       fp_trace_xeno_init(branch);
 }
 
 static inline void xnarch_save_fpu(xnarchtcb_t * tcb)
 {
        struct task_struct *task = tcb->user_task;
+       unsigned branch;
 
        if (!tcb->is_root) {
                if (task) {
                        /* fpu not used or already saved by __switch_to. */
-                       if (!wrap_test_fpu_used(task))
+                       if (!wrap_test_fpu_used(task)) {
+                               fp_trace_xeno_save(1, 0);
                                return;
+                       }
 
                        /* Tell Linux that we already saved the state of the FPU
                           hardware of this task. */
                        wrap_clear_fpu_used(task);
-               }
+                       branch = 2;
+               } else
+                       branch = 3;
        } else {
                if (tcb->cr0_ts ||
-                   (tcb->ts_usedfpu && !wrap_test_fpu_used(task)))
+                   (tcb->ts_usedfpu && !wrap_test_fpu_used(task))) {
+                       fp_trace_xeno_save(4, 0);
                        return;
+               }
 
                wrap_clear_fpu_used(task);
+               branch = 5;
        }
 
        clts();
 
+       fp_trace_xeno_save(branch, 1);
        if (cpu_has_fxsr)
                __asm__ __volatile__("fxsave %0; fnclex":"=m"(*tcb->fpup));
        else
@@ -220,10 +234,12 @@ static inline void xnarch_save_fpu(xnarchtcb_t * tcb)
 static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
 {
        struct task_struct *task = tcb->user_task;
+       unsigned branch;
 
        if (!tcb->is_root) {
                if (task) {
                        if (!xnarch_fpu_init_p(task)) {
+                               fp_trace_xeno_restore(1, 0);
                                stts();
                                return; /* Uninit fpu area -- do not restore. */
                        }
@@ -231,10 +247,13 @@ static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
                        /* Tell Linux that this task has altered the state of
                         * the FPU hardware. */
                        wrap_set_fpu_used(task);
-               }
+                       branch = 2;
+               } else
+                       branch = 3;
        } else {
                /* Restore state of FPU only if TS bit in cr0 was clear. */
                if (tcb->cr0_ts) {
+                       fp_trace_xeno_restore(4, 0);
                        stts();
                        return;
                }
@@ -243,9 +262,12 @@ static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
                        /* __switch_to saved the fpu context, no need to restore
                           it since we are switching to root, where fpu can be
                           in lazy state. */
+                       fp_trace_xeno_restore(5, 0);
                        stts();
                        return;
                }
+
+               branch = 6;
        }
 
        /* Restore the FPU hardware with valid fp registers from a
@@ -258,16 +280,20 @@ static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
        else
                __asm__ __volatile__("frstor %0": /* no output */ :"m"(*tcb->
                                                                       fpup));
+       fp_trace_xeno_restore(branch, 1);
 }
 
 static inline void xnarch_enable_fpu(xnarchtcb_t * tcb)
 {
        struct task_struct *task = tcb->user_task;
+       unsigned branch;
 
        if (!tcb->is_root) {
                if (task) {
-                       if (!xnarch_fpu_init_p(task))
+                       if (!xnarch_fpu_init_p(task)) {
+                               fp_trace_xeno_enable(1, 0);
                                return;
+                       }
 
                        /* We used to test here if __switch_to had not saved
                           current fpu state, but this can not happen, since
@@ -275,15 +301,22 @@ static inline void xnarch_enable_fpu(xnarchtcb_t * tcb)
                           back to a user-space task after one or several
                           switches to non-fpu kernel-space real-time tasks, so
                           xnarch_switch_to never uses __switch_to. */
-               }
+                       branch = 2;
+               } else
+                       branch = 3;
        } else {
-               if (tcb->cr0_ts)
+               if (tcb->cr0_ts) {
+                       fp_trace_xeno_enable(4, 0);
                        return;
+               }
 
                /* The comment in the non-root case applies here too. */
+               branch = 5;
        }
 
        clts();
+
+       fp_trace_xeno_enable(branch, 1);
 }
 
 #else /* !CONFIG_XENO_HW_FPU */
diff --git a/include/asm-x86/bits/pod_64.h b/include/asm-x86/bits/pod_64.h
index 3eea0f5..d349343 100644
--- a/include/asm-x86/bits/pod_64.h
+++ b/include/asm-x86/bits/pod_64.h
@@ -145,6 +145,7 @@ static inline void xnarch_init_fpu(xnarchtcb_t * tcb)
 {
        struct task_struct *task = tcb->user_task;
        unsigned long __mxcsr;
+       unsigned branch;
        /* Initialize the FPU for a task. This must be run on behalf of the
           task. */
 
@@ -159,7 +160,10 @@ static inline void xnarch_init_fpu(xnarchtcb_t * tcb)
                   save the FPU state at next switch. */
                xnarch_set_fpu_init(task);
                task_thread_info(task)->status |= TS_USEDFPU;
-       }
+               branch = 1;
+       } else
+               branch = 2;
+       fp_trace_xeno_init(branch);
 }
 
 static inline int __save_i387_checking(struct i387_fxsave_struct __user *fx)
@@ -185,27 +189,36 @@ static inline int __save_i387_checking(struct 
i387_fxsave_struct __user *fx)
 static inline void xnarch_save_fpu(xnarchtcb_t *tcb)
 {
        struct task_struct *task = tcb->user_task;
+       unsigned branch;
 
        if (!tcb->is_root) {
                if (task) {
                        /* fpu not used or already saved by __switch_to. */
-                       if (!(task_thread_info(task)->status & TS_USEDFPU))
+                       if (!(task_thread_info(task)->status & TS_USEDFPU)) {
+                               fp_trace_xeno_save(1, 0);
                                return;
+                       }
 
                        /* Tell Linux that we already saved the state
                         * of the FPU hardware of this task. */
                        task_thread_info(task)->status &= ~TS_USEDFPU;
-               }
+                       branch = 2;
+               } else
+                       branch = 3;
        } else {
                if (tcb->cr0_ts ||
-                   (tcb->ts_usedfpu && !(task_thread_info(task)->status & 
TS_USEDFPU)))
+                   (tcb->ts_usedfpu && !(task_thread_info(task)->status & 
TS_USEDFPU))) {
+                       fp_trace_xeno_save(4, 0);
                        return;
+               }
 
                task_thread_info(task)->status &= ~TS_USEDFPU;
+               branch = 5;
        }
 
        clts();
 
+       fp_trace_xeno_save(branch, 1);
        __save_i387_checking(&tcb->fpup->fxsave);
 }
 
@@ -232,10 +245,12 @@ static inline int __restore_i387_checking(struct 
i387_fxsave_struct *fx)
 static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
 {
        struct task_struct *task = tcb->user_task;
+       unsigned branch;
 
        if (!tcb->is_root) {
                if (task) {
                        if (!xnarch_fpu_init_p(task)) {
+                               fp_trace_xeno_restore(1, 0);
                                stts();
                                return; /* Uninit fpu area -- do not restore. */
                        }
@@ -243,10 +258,13 @@ static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
                        /* Tell Linux that this task has altered the state of
                         * the FPU hardware. */
                        task_thread_info(task)->status |= TS_USEDFPU;
-               }
+                       branch = 2;
+               } else
+                       branch = 3;
        } else {
                /* Restore state of FPU only if TS bit in cr0 was clear. */
                if (tcb->cr0_ts) {
+                       fp_trace_xeno_restore(4, 0);
                        stts();
                        return;
                }
@@ -256,9 +274,12 @@ static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
                        /* __switch_to saved the fpu context, no need to restore
                           it since we are switching to root, where fpu can be
                           in lazy state. */
+                       fp_trace_xeno_restore(5, 0);
                        stts();
                        return;
                }
+
+               branch = 6;
        }
 
        /* Restore the FPU hardware with valid fp registers from a
@@ -266,16 +287,21 @@ static inline void xnarch_restore_fpu(xnarchtcb_t * tcb)
        clts();
 
        __restore_i387_checking(&tcb->fpup->fxsave);
+
+       fp_trace_xeno_restore(branch, 1);
 }
 
 static inline void xnarch_enable_fpu(xnarchtcb_t * tcb)
 {
        struct task_struct *task = tcb->user_task;
+       unsigned branch;
 
        if (!tcb->is_root) {
                if (task) {
-                       if (!xnarch_fpu_init_p(task))
+                       if (!xnarch_fpu_init_p(task)) {
+                               fp_trace_xeno_enable(1, 0);
                                return;
+                       }
 
                        /* We used to test here if __switch_to had not saved
                           current fpu state, but this can not happen, since
@@ -283,15 +309,23 @@ static inline void xnarch_enable_fpu(xnarchtcb_t * tcb)
                           back to a user-space task after one or several
                           switches to non-fpu kernel-space real-time tasks, so
                           xnarch_switch_to never uses __switch_to. */
-               }
+
+                       branch = 2;
+               } else
+                       branch = 3;
        } else {
-               if (tcb->cr0_ts)
+               if (tcb->cr0_ts) {
+                       fp_trace_xeno_enable(4, 0);
                        return;
+               }
 
                /* The comment in the non-root case applies here too. */
+               branch = 5;
        }
 
        clts();
+
+       fp_trace_xeno_enable(branch, 1);
 }
 
 #else /* !CONFIG_XENO_HW_FPU */
diff --git a/include/asm-x86/fp_dump.h b/include/asm-x86/fp_dump.h
new file mode 100644
index 0000000..50e9a8b
--- /dev/null
+++ b/include/asm-x86/fp_dump.h
@@ -0,0 +1,175 @@
+#ifndef FP_DUMP_X86_H
+#define FP_DUMP_X86_H
+
+#include <asm/processor.h>     /* For struct i387 */
+#include <asm-generic/xenomai/fp_dump.h> /* For fp_disp */
+
+typedef union {
+       struct i387_fsave_struct        fsave;
+       struct i387_fxsave_struct       fxsave;
+} fpenv_t;
+
+#define ALIGN_FPENV(ptr) \
+       (fpenv_t *) (((unsigned long) (ptr) + 15) & ~15)
+
+#define DECLARE_FPENV(var)                                             \
+       unsigned char var ## _buf [sizeof(fpenv_t) + 15];               \
+       fpenv_t *var = ALIGN_FPENV(&var ## _buf[0])
+
+/* Layout of hardware floats (extended). */
+typedef struct fpreg {
+       unsigned long long significand;
+       unsigned short exp : 15;    /* signed, but not using 2's complement. */
+       unsigned short sign : 1;
+} __attribute__((packed)) fpreg_t;
+
+static inline void fp_fsave (fpenv_t *fpenvp)
+
+{
+       struct i387_fsave_struct *fpenv =
+               (struct i387_fsave_struct *) ALIGN_FPENV(fpenvp);
+       __asm__ __volatile__ ("fsave %0; fwait" : "=m" (*fpenv));
+       __asm__ __volatile__ ("frstor %0; fwait":
+                             /* no output */ : "m" (*fpenv));
+}
+
+static inline void fp_fsave_dump(void *cookie, fpenv_t *fpenvp)
+{
+       struct i387_fsave_struct *fpenv =
+               (struct i387_fsave_struct *) ALIGN_FPENV(fpenvp);
+       unsigned top, i;
+
+       top = (fpenv->swd >> 11) & 0x7;
+
+       fp_print(cookie, "FPU state:\n");
+       fp_print(cookie, "cwd: %08x,  swd: %08x\n", fpenv->cwd, fpenv->swd);
+       fp_print(cookie, "twd: %08x,  fip: %08x\n", fpenv->twd, fpenv->fip);
+       fp_print(cookie, "fcs: %08x,  foo: %08x\n", fpenv->fcs, fpenv->foo);
+       fp_print(cookie, "fos: %08x\n", fpenv->fos);
+       fp_print(cookie, "registers stack, top at %u:\n", top);
+
+       for(i = 7; i <= 7; i--) {
+               char *stack = (char *) &fpenv->st_space;
+               fpreg_t *reg = (fpreg_t *) (stack + 10 * ((i - top) % 8));
+               unsigned tag = (fpenv->twd >> (2 * i)) & 0x3;
+               fp_conv_t fp = {
+                       .significand = reg->significand,
+                       .exp = (int) (reg->exp - 0x3FFF),
+                       .pow10 = 0,
+               };
+               fp_print(cookie, "%u: ", i);
+               switch(tag) {
+               case 3: /* Empty */
+                       fp_print(cookie, "Empty\n");
+                       break;
+               case 1: /* Zero */
+                       fp_print(cookie, "0\n");
+                       break;
+               case 2: /* Special */
+                       if (reg->exp != 0x7fff) {
+                               fp_print(cookie, "denormal; ");
+                               goto valid;
+                       }
+
+                       if(!reg->significand)
+                               fp_print(cookie, "%c inf.\n",
+                                        reg->sign ? '-' : '+');
+                       else
+                               fp_print(cookie, "%cnan.\n",
+                                        (reg->significand & (1ULL << 63) ? 'Q' 
: 'S'));
+                       break;
+               case 0: /* Valid */
+               valid:
+                       fp_disp(cookie, &fp, reg->sign);
+                       fp_print(cookie, "\n");
+               }
+       }
+       fp_print(cookie, "\n");
+}
+
+static inline void fp_fxsave (fpenv_t *fpenvp)
+
+{
+       struct i387_fxsave_struct *fpenv =
+               (struct i387_fxsave_struct *) ALIGN_FPENV(fpenvp);
+
+       __asm__ __volatile__ ("fxsave %0; fwait" : "=m" (*fpenv));
+}
+
+static inline void fp_fxsave_dump(void *cookie, fpenv_t *fpenvp)
+{
+       struct i387_fxsave_struct *fpenv =
+               (struct i387_fxsave_struct *) ALIGN_FPENV(fpenvp);
+       unsigned top, i;
+
+       top = (fpenv->swd >> 11) & 0x7;
+
+       fp_print(cookie, "FPU state:\n");
+       fp_print(cookie, "  cwd: %08x,  swd: %08x\n", fpenv->cwd, fpenv->swd);
+       fp_print(cookie, "  twd: %08x,  fop: %08x\n", fpenv->twd, fpenv->fop);
+       fp_print(cookie, "  fip: %08x,  fcs: %08x\n", fpenv->fip, fpenv->fcs);
+       fp_print(cookie, "  foo: %08x,  fos: %08x\n", fpenv->foo, fpenv->fos);
+       fp_print(cookie, "mxcsr: %08x\n", fpenv->mxcsr);
+       fp_print(cookie, "registers stack, top at %u:\n", top);
+
+       for(i = 7; i <= 7; i--) {
+               char *stack = (char *) &fpenv->st_space;
+               fpreg_t *reg = (fpreg_t *) (stack + 16 * ((i - top) % 8));
+               unsigned tag = !!(fpenv->twd & (1 << i));
+               fp_conv_t fp = {
+                       .significand = reg->significand,
+                       .exp = (int) (reg->exp - 0x3FFF),
+                       .pow10 = 0,
+               };
+               fp_print(cookie, "%u: ", i);
+               switch(tag) {            /* Abridged tag. */
+               case 0: /* Empty */
+                       fp_print(cookie, "Empty\n");
+                       break;
+               case 1: /* Not empty. */
+                       switch(reg->exp) {
+                       case 0x7fff:
+                               /* Special. */
+                               if(!reg->significand)
+                                       fp_print(cookie, "%c inf.\n",
+                                                reg->sign ? '-' : '+');
+                               else
+                                       fp_print(cookie, "%cnan.\n",
+                                                (reg->significand & (1ULL << 
63) ? 'Q' : 'S'));
+                               break;
+                       case 0:
+                               /* Zero. */
+                               if(!reg->significand) {
+                                       fp_print(cookie, "0\n");
+                                       break;
+                               }
+                               /* Fall through wanted. */
+                       default:
+                               /* Valid or denormal.*/
+                               if(!reg->significand & (1ULL << 63))
+                                       fp_print(cookie, "denormal: ");
+                               fp_disp(cookie, &fp, reg->sign);
+                               fp_print(cookie, "\n");
+                       }
+               }
+       }
+       fp_print(cookie, "\n");
+}
+
+static inline void fpenv_save(fpenv_t *env)
+{
+       if(cpu_has_fxsr)
+               fp_fxsave(env);
+       else
+               fp_fsave(env);
+}
+
+static inline void fpenv_dump(void *cookie, fpenv_t *env)
+{
+       if(cpu_has_fxsr)
+               fp_fxsave_dump(cookie, env);
+       else
+               fp_fsave_dump(cookie, env);
+}
+
+#endif /* FP_DUMP_X86_H */
diff --git a/include/nucleus/fpu_trace.h b/include/nucleus/fpu_trace.h
new file mode 100644
index 0000000..0daef95
--- /dev/null
+++ b/include/nucleus/fpu_trace.h
@@ -0,0 +1,49 @@
+#ifndef FPU_TRACE_H
+#define FPU_TRACE_H
+
+struct task_struct;
+struct xnthread;
+
+#define FPU_TRACE_ENABLED
+
+#ifdef FPU_TRACE_ENABLED
+
+void fp_trace_switch(struct task_struct *from, struct task_struct *to);
+
+void fp_trace_kernel_start(void);
+
+void fp_trace_kernel_end(void);
+
+void fp_trace_save(void);
+
+void fp_trace_restore(void);
+
+void fp_trace_xeno_init(unsigned branch);
+
+void fp_trace_xeno_switch(struct xnthread *from, struct xnthread *to);
+
+void fp_trace_xeno_save(unsigned branch, unsigned do_save);
+
+void fp_trace_xeno_restore(unsigned branch, unsigned do_save);
+
+void fp_trace_xeno_enable(unsigned branch, unsigned do_save);
+
+void fp_trace_freeze(void);
+
+#else
+
+#define fp_trace_switch(from, to) do { } while(0)
+#define fp_trace_kernel_start() do { } while(0)
+#define fp_trace_kernel_end() do { } while (0)
+#define fp_trace_save() do { } while(0)
+#define fp_trace_restore() do { } while(0)
+#define fp_trace_xeno_init(branch) do { } while (0)
+#define fp_trace_xeno_switch(from, to) do { } while(0)
+#define fp_trace_xeno_save(branch, do_save) do { } while(0)
+#define fp_trace_xeno_restore(branch, do_save) do { } while(0)
+#define fp_trace_xeno_enable(branch, do_save) do { } while(0)
+#define fp_trace_freeze() do { } while(0)
+
+#endif
+
+#endif /* FPU_TRACE_H */
diff --git a/ksrc/arch/x86/patches/fp_tracepoints.diff 
b/ksrc/arch/x86/patches/fp_tracepoints.diff
new file mode 100644
index 0000000..e3000e0
--- /dev/null
+++ b/ksrc/arch/x86/patches/fp_tracepoints.diff
@@ -0,0 +1,107 @@
+diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
+index e636ef3..687e234 100644
+--- a/arch/x86/include/asm/i387.h
++++ b/arch/x86/include/asm/i387.h
+@@ -14,6 +14,9 @@
+ #include <linux/kernel_stat.h>
+ #include <linux/regset.h>
+ #include <linux/hardirq.h>
++
++#include <xenomai/nucleus/fpu_trace.h>
++
+ #include <asm/asm.h>
+ #include <asm/processor.h>
+ #include <asm/sigcontext.h>
+@@ -77,10 +80,13 @@ static inline int fxrstor_checking(struct 
i387_fxsave_struct *fx)
+ 
+ static inline int restore_fpu_checking(struct task_struct *tsk)
+ {
++      int ret;
+       if (task_thread_info(tsk)->status & TS_XSAVE)
+-              return xrstor_checking(&tsk->thread.xstate->xsave);
++              ret = xrstor_checking(&tsk->thread.xstate->xsave);
+       else
+-              return fxrstor_checking(&tsk->thread.xstate->fxsave);
++              ret = fxrstor_checking(&tsk->thread.xstate->fxsave);
++      fp_trace_restore();
++      return ret;
+ }
+ 
+ /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+@@ -161,6 +167,7 @@ static inline void fxsave(struct task_struct *tsk)
+ 
+ static inline void __save_init_fpu(struct task_struct *tsk)
+ {
++      fp_trace_save();
+       if (task_thread_info(tsk)->status & TS_XSAVE)
+               xsave(tsk);
+       else
+@@ -183,6 +190,7 @@ static inline void restore_fpu(struct task_struct *tsk)
+ {
+       if (task_thread_info(tsk)->status & TS_XSAVE) {
+               xrstor_checking(&tsk->thread.xstate->xsave);
++              fp_trace_restore();
+               return;
+       }
+       /*
+@@ -194,6 +202,7 @@ static inline void restore_fpu(struct task_struct *tsk)
+               "fxrstor %1",
+               X86_FEATURE_FXSR,
+               "m" (tsk->thread.xstate->fxsave));
++      fp_trace_restore();
+ }
+ 
+ /* We need a safe address that is cheap to find and that is already
+@@ -210,6 +219,7 @@ static inline void restore_fpu(struct task_struct *tsk)
+  */
+ static inline void __save_init_fpu(struct task_struct *tsk)
+ {
++      fp_trace_save();
+       if (task_thread_info(tsk)->status & TS_XSAVE) {
+               struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
+               struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
+@@ -285,6 +295,7 @@ static inline void kernel_fpu_begin(void)
+       struct thread_info *me = current_thread_info();
+       unsigned long flags;
+       preempt_disable();
++      fp_trace_kernel_start();
+       local_irq_save_hw_cond(flags);
+       if (me->status & TS_USEDFPU)
+               __save_init_fpu(me->task);
+@@ -296,6 +307,7 @@ static inline void kernel_fpu_begin(void)
+ static inline void kernel_fpu_end(void)
+ {
+       stts();
++      fp_trace_kernel_end();
+       preempt_enable();
+ }
+ 
+diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
+index 4b84f80..21f31a1 100644
+--- a/arch/x86/include/asm/system.h
++++ b/arch/x86/include/asm/system.h
+@@ -10,6 +10,8 @@
+ #include <linux/kernel.h>
+ #include <linux/irqflags.h>
+ 
++#include <xenomai/nucleus/fpu_trace.h>
++
+ /* entries in ARCH_DLINFO: */
+ #ifdef CONFIG_IA32_EMULATION
+ # define AT_VECTOR_SIZE_ARCH 2
+@@ -37,6 +39,7 @@ do {                                                         
        \
+        * __switch_to())                                               \
+        */                                                             \
+       unsigned long ebx, ecx, edx, esi, edi;                          \
++      fp_trace_switch(prev, next);                                    \
+                                                                       \
+       asm volatile("pushfl\n\t"               /* save    flags */     \
+                    "pushl %%ebp\n\t"          /* save    EBP   */     \
+@@ -88,6 +91,7 @@ do {                                                         
        \
+ 
+ /* Save restore flags to clear handle leaking NT */
+ #define switch_to(prev, next, last) \
++      fp_trace_switch(prev, next);                                    \
+       asm volatile(SAVE_CONTEXT                                               
    \
+            "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */       \
+            "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */    
  \
diff --git a/ksrc/nucleus/Makefile b/ksrc/nucleus/Makefile
index bc39f1e..a5334d2 100644
--- a/ksrc/nucleus/Makefile
+++ b/ksrc/nucleus/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_XENO_OPT_NUCLEUS) += xeno_nucleus.o
 
 xeno_nucleus-y := \
        bufd.o heap.o intr.o pod.o registry.o \
-       synch.o thread.o timebase.o timer.o sched.o
+       synch.o thread.o timebase.o timer.o sched.o fpu_trace.o
 
 # CAUTION: the declaration order of scheduling classes is
 # significant. Lower priority classes shall be listed first.
@@ -38,7 +38,7 @@ list-multi := xeno_nucleus.o
 
 xeno_nucleus-objs := \
        bufd.o heap.o intr.o pod.o registry.o \
-       synch.o thread.o timebase.o timer.o sched.o
+       synch.o thread.o timebase.o timer.o sched.o fpu_trace.o
 
 # CAUTION: the declaration order of scheduling classes is
 # significant. Lower priority classes shall be listed first.
diff --git a/ksrc/nucleus/fpu_trace.c b/ksrc/nucleus/fpu_trace.c
new file mode 100644
index 0000000..ea1f8b2
--- /dev/null
+++ b/ksrc/nucleus/fpu_trace.c
@@ -0,0 +1,566 @@
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <nucleus/fpu_trace.h>
+#include <nucleus/thread.h>    /* For xnthread_t */
+
+#define fp_print(cookie, format, args...) \
+       seq_printf(cookie, format , ##args)
+
+#include <asm/xenomai/fp_dump.h>
+
+#ifdef FPU_TRACE_ENABLED
+
+#if 1
+#define STORAGE_SIZE (128 * 1024 * 1024)
+#else
+#define STORAGE_SIZE (16 * 1024 * 2024)
+#endif
+#define SIZE_MASK (STORAGE_SIZE - 1)
+
+enum fp_trace_event {
+       FP_TRACE_LTHR = 0,
+       FP_TRACE_LKFPBEG,
+       FP_TRACE_LKFPEND,
+       FP_TRACE_LFPS,
+       FP_TRACE_LFPR,
+       FP_TRACE_XINI,
+       FP_TRACE_XTHR,
+       FP_TRACE_XFPS,
+       FP_TRACE_XFPR,
+       FP_TRACE_XFPEN,
+       FP_TRACE_STOP,
+       FP_TRACE_MAX,
+};
+
+struct fp_trace_type {
+       unsigned short type;
+       unsigned short branch;
+};
+
+struct fp_trace_switch {
+       char from[64];
+       char to[64];
+};
+
+static unsigned event_hit[FP_TRACE_MAX];
+
+static unsigned points, starting, frozen, ready;
+static unsigned long begin, end;
+static char *storage;
+static IPIPE_DEFINE_SPINLOCK(lock);
+
+static void fp_trace_push(enum fp_trace_event type,
+                         unsigned branch, void *buf, unsigned len);
+
+static unsigned
+fp_trace_print_task(char *buf, size_t size,
+                   xnthread_t *thread, struct task_struct *user);
+
+void fp_trace_switch(struct task_struct *from, struct task_struct *to)
+{
+       struct fp_trace_switch buf;
+
+       fp_trace_print_task(&buf.from[0], sizeof(buf.from), NULL, from);
+       fp_trace_print_task(&buf.to[0], sizeof(buf.to), NULL, to);
+       fp_trace_push(FP_TRACE_LTHR, 0, &buf, sizeof(buf));
+}
+
+void fp_trace_kernel_start(void)
+{
+       fp_trace_push(FP_TRACE_LKFPBEG, 0, NULL, 0);
+}
+
+void fp_trace_kernel_end(void)
+{
+       fp_trace_push(FP_TRACE_LKFPEND, 0, NULL, 0);
+}
+
+void fp_trace_save(void)
+{
+       DECLARE_FPENV(ctxt);
+       fpenv_save(ctxt);
+       fp_trace_push(FP_TRACE_LFPS, 0, ctxt, sizeof(*ctxt));
+}
+
+void fp_trace_restore(void)
+{
+       DECLARE_FPENV(ctxt);
+       fpenv_save(ctxt);
+       fp_trace_push(FP_TRACE_LFPR, 0, ctxt, sizeof(*ctxt));
+}
+
+void fp_trace_xeno_init(unsigned branch)
+{
+       fp_trace_push(FP_TRACE_XINI, branch, NULL, 0);
+}
+
+void fp_trace_xeno_switch(xnthread_t *from, xnthread_t *to)
+{
+       struct fp_trace_switch buf;
+
+       fp_trace_print_task(&buf.from[0], sizeof(buf.from), from, NULL);
+       fp_trace_print_task(&buf.to[0], sizeof(buf.to), to, NULL);
+       fp_trace_push(FP_TRACE_XTHR, 0, &buf, sizeof(buf));
+}
+
+void fp_trace_xeno_save(unsigned branch, unsigned do_save)
+{
+       if (do_save) {
+               DECLARE_FPENV(ctxt);
+               fpenv_save(ctxt);
+               fp_trace_push(FP_TRACE_XFPS,
+                             branch | (1 << 15), ctxt, sizeof(*ctxt));
+       } else
+               fp_trace_push(FP_TRACE_XFPS, branch, NULL, 0);
+}
+
+void fp_trace_xeno_restore(unsigned branch, unsigned do_save)
+{
+       if (do_save) {
+               DECLARE_FPENV(ctxt);
+               fpenv_save(ctxt);
+               fp_trace_push(FP_TRACE_XFPR,
+                             branch | (1 << 15), ctxt, sizeof(*ctxt));
+       } else
+               fp_trace_push(FP_TRACE_XFPR, branch, NULL, 0);
+}
+
+void fp_trace_xeno_enable(unsigned branch, unsigned do_save)
+{
+       if (do_save) {
+               DECLARE_FPENV(ctxt);
+               fpenv_save(ctxt);
+               fp_trace_push(FP_TRACE_XFPEN,
+                             branch | (1 << 15), ctxt, sizeof(*ctxt));
+       } else
+               fp_trace_push(FP_TRACE_XFPEN,branch, NULL, 0);
+}
+
+void fp_trace_freeze(void)
+{
+       fp_trace_push(FP_TRACE_STOP, 0, NULL, 0);
+       frozen = 1;
+}
+
+static unsigned long fp_trace_next_entry(unsigned long cur)
+{
+       struct fp_trace_type *type =
+               (struct fp_trace_type *) (storage + cur);
+       if (cur % 4) {
+               spin_unlock_irq(&lock);
+               rthal_emergency_console();
+               printk("Entry not aligned, type: %d, entry: %lx,"
+                      " end: %lx\n", type->type, cur, end);
+               BUG();
+       }
+       cur += sizeof(*type);
+       switch(type->type) {
+       case FP_TRACE_LTHR:
+       case FP_TRACE_XTHR:
+               cur += sizeof(struct fp_trace_switch);
+               break;
+       case FP_TRACE_LFPS:
+       case FP_TRACE_LFPR:
+               cur += sizeof(fpenv_t);
+               break;
+       case FP_TRACE_LKFPBEG:
+       case FP_TRACE_LKFPEND:
+       case FP_TRACE_XINI:
+       case FP_TRACE_XFPS:
+       case FP_TRACE_XFPR:
+       case FP_TRACE_XFPEN:
+       case FP_TRACE_STOP:
+               if (type->branch & (1 << 15))
+                       cur += sizeof(fpenv_t);
+               break;
+       default:
+               spin_unlock_irq(&lock);
+               printk("Invalid type, type: %d, entry: %lx,"
+                      " end: %lx\n", type->type, cur, end);
+               BUG();
+       }
+       return cur & SIZE_MASK;
+}
+
+static void fp_trace_push(enum fp_trace_event type,
+                         unsigned branch, void *buf, unsigned len)
+{
+       struct fp_trace_type *t;
+       unsigned long flags;
+
+       spin_lock_irqsave(&lock, flags);
+       if (frozen || !ready)
+               goto unlock;
+
+       t = (struct fp_trace_type *) (storage + end);
+       if (unlikely(starting))
+               starting = 0;
+       else
+               while (((begin - end) & SIZE_MASK) < sizeof(*t) + len) {
+                       begin = fp_trace_next_entry(begin);
+                       --points;
+               }
+
+       event_hit[type] |= (1 << (branch & ~(1U << 15)));
+       if (type >= FP_TRACE_MAX) {
+               spin_unlock_irqrestore(&lock, flags);
+               BUG();
+       }
+       t->type = type;
+       t->branch = branch;
+       end = (end + sizeof(*t)) & SIZE_MASK;
+       if (!len)
+               goto inc_points;
+       if (end + len <= STORAGE_SIZE)
+               memcpy(storage + end, buf, len);
+       else {
+               unsigned cut = STORAGE_SIZE - end;
+               memcpy(storage + end, buf, cut);
+               memcpy(storage, buf + cut, len - cut);
+       }
+       end = (end + len) & SIZE_MASK;
+       if (fp_trace_next_entry((char *) t - storage) != end) {
+               spin_unlock_irqrestore(&lock, flags);
+               printk("Trace, type: %d, entry: %lx, len: %x, next: %lx,"
+                       " end: %lx\n", t->type, (char *) t - storage, len,
+                       fp_trace_next_entry((char *) t - storage), end);
+               BUG();
+       }
+  inc_points:
+       ++points;
+  unlock:
+       spin_unlock_irqrestore(&lock, flags);
+}
+
+static unsigned
+fp_trace_print_task(char *buf, size_t size,
+                   xnthread_t *thread, struct task_struct *user)
+{
+       if (!user)
+               user = xnthread_user_task(thread);
+
+       if (!user)
+               return snprintf(buf, size, "RTK: %s(%p)",
+                               xnthread_name(thread), thread);
+
+       if (thread && !xnthread_test_state(thread, XNROOT))
+               return snprintf(buf, size, "RTUP: %s[%d]",
+                               xnthread_name(thread), user->pid);
+
+       thread = xnshadow_thread(user);
+       if (thread)
+               return snprintf(buf, size, "RTUS: %s[%d]",
+                               xnthread_name(thread), user->pid);
+
+       return snprintf(buf, size, "NRT: %s[%d]", user->comm, user->pid);
+}
+
+static void *fp_trace_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       unsigned i;
+
+       if (!frozen || *pos >= points)
+               return NULL;
+
+       for (i = 0, seq->private = (void *) begin; i < *pos; i++)
+               seq->private =
+                       (void *) fp_trace_next_entry((u_long) seq->private);
+
+       return storage + (u_long) seq->private;
+}
+
+static void *fp_trace_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       if(++*pos >= points)
+               return NULL;
+
+       seq->private =
+               (void *) fp_trace_next_entry((u_long) seq->private);
+
+       return storage + (u_long) seq->private;
+}
+
+static void fp_trace_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static void cpy_buf(void *dest, unsigned len, void *src)
+{
+       unsigned long cur = ((char *) src - storage) & SIZE_MASK;
+
+       if (cur + len <= STORAGE_SIZE)
+               memcpy(dest, src, len);
+       else {
+               unsigned long cut = STORAGE_SIZE - cur;
+               memcpy(dest, src, cut);
+               memcpy(dest + cut, storage, len - cut);
+       }
+}
+
+static int fp_trace_seq_show(struct seq_file *seq, void *v)
+{
+       struct fp_trace_type *type = v;
+       struct fp_trace_switch swtch;
+       DECLARE_FPENV(ctxt);
+
+       switch(type->type) {
+       case FP_TRACE_LTHR:
+               cpy_buf(&swtch, sizeof(swtch), type + 1);
+               seq_printf(seq, "Linux switch from %s to %s\n",
+                          swtch.from, swtch.to);
+               break;
+
+       case FP_TRACE_LKFPBEG:
+               seq_printf(seq, "Linux, starting use of FPU in kernel-space\n");
+               break;
+
+       case FP_TRACE_LKFPEND:
+               seq_printf(seq, "Linux, ending use of FPU in kernel-space\n");
+               break;
+
+       case FP_TRACE_LFPS:
+               seq_printf(seq, "Linux saving fpu\n");
+               cpy_buf(ctxt, sizeof(*ctxt), type + 1);
+               fpenv_dump(seq, ctxt);
+               break;
+
+       case FP_TRACE_LFPR:
+               seq_printf(seq, "Linux restoring fpu\n");
+               cpy_buf(ctxt, sizeof(*ctxt), type + 1);
+               fpenv_dump(seq, ctxt);
+               break;
+
+       case FP_TRACE_XINI:
+               seq_printf(seq, "Xeno init fpu (branch %d)\n", type->branch);
+               break;
+
+       case FP_TRACE_XTHR:
+               cpy_buf(&swtch, sizeof(swtch), type + 1);
+               seq_printf(seq, "Xeno switch from %s to %s\n",
+                          swtch.from, swtch.to);
+               break;
+
+       case FP_TRACE_XFPS:
+               seq_printf(seq, "Xeno saving fpu (branch %d)\n",
+                          type->branch & ~(1 << 15));
+               if (type->branch & (1 << 15)) {
+                       cpy_buf(ctxt, sizeof(*ctxt), type + 1);
+                       fpenv_dump(seq, ctxt);
+               }
+               break;
+
+       case FP_TRACE_XFPR:
+               seq_printf(seq, "Xeno restoring fpu (branch %d)\n",
+                          type->branch & ~(1 << 15));
+               if (type->branch & (1 << 15)) {
+                       cpy_buf(ctxt, sizeof(*ctxt), type + 1);
+                       fpenv_dump(seq, ctxt);
+               }
+               break;
+
+       case FP_TRACE_XFPEN:
+               seq_printf(seq, "Xeno enabling fpu (branch %d)\n",
+                          type->branch & ~(1 << 15));
+               if (type->branch & (1 << 15)) {
+                       cpy_buf(ctxt, sizeof(*ctxt), type + 1);
+                       fpenv_dump(seq, ctxt);
+               }
+               break;
+
+       case FP_TRACE_STOP:
+               seq_printf(seq, "Capture stopped\n");
+               if (type->branch & (1 << 15)) {
+                       cpy_buf(ctxt, sizeof(*ctxt), type + 1);
+                       fpenv_dump(seq, ctxt);
+               }
+               break;
+
+       default:
+               printk("type: 0x%04x, cur: 0x%08lx\n",
+                      type->type, (char *) type - storage);
+               BUG();
+       }
+
+       return 0;
+}
+
+static struct seq_operations fp_trace_seq_op = {
+       .start = &fp_trace_seq_start,
+       .next = &fp_trace_seq_next,
+       .stop = &fp_trace_seq_stop,
+       .show = &fp_trace_seq_show
+};
+
+static int fp_trace_seq_open(struct inode *inode, struct file *file)
+{
+       if (!frozen || !ready)
+               return -ENOENT;
+
+       return seq_open(file, &fp_trace_seq_op);
+}
+
+static struct file_operations fp_trace_seq_operations = {
+       .owner = THIS_MODULE,
+       .open = fp_trace_seq_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = NULL,
+};
+
+static void *fp_trace_cov_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       if (*pos >= FP_TRACE_MAX)
+               return NULL;
+
+       seq->private = (void *) (long) *pos;
+
+       return &event_hit[*pos];
+}
+
+static void *fp_trace_cov_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       if(++*pos >= FP_TRACE_MAX)
+               return NULL;
+
+       seq->private = (void *) (long) *pos;
+
+       return &event_hit[*pos];
+}
+
+static void fp_trace_cov_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int fp_trace_cov_seq_show(struct seq_file *seq, void *v)
+{
+       unsigned event = (unsigned *) v - event_hit;
+       unsigned this_event_hits = *(unsigned *) v;
+       unsigned i, hit = 0;
+
+       switch(event) {
+       case FP_TRACE_LTHR:
+               seq_printf(seq, "Linux switch: ");
+               break;
+
+       case FP_TRACE_LKFPBEG:
+               seq_printf(seq, "Linux, starting use of FPU in kernel-space: ");
+               break;
+
+       case FP_TRACE_LKFPEND:
+               seq_printf(seq, "Linux, ending use of FPU in kernel-space: ");
+               break;
+
+       case FP_TRACE_LFPS:
+               seq_printf(seq, "Linux saving fpu: ");
+               break;
+
+       case FP_TRACE_LFPR:
+               seq_printf(seq, "Linux restoring fpu: ");
+               break;
+
+       case FP_TRACE_XINI:
+               seq_printf(seq, "Xeno init fpu: ");
+               break;
+
+       case FP_TRACE_XTHR:
+               seq_printf(seq, "Xeno switch: ");
+               break;
+
+       case FP_TRACE_XFPS:
+               seq_printf(seq, "Xeno saving fpu: ");
+               break;
+
+       case FP_TRACE_XFPR:
+               seq_printf(seq, "Xeno restoring fpu: ");
+               break;
+
+       case FP_TRACE_XFPEN:
+               seq_printf(seq, "Xeno enabling fpu: ");
+               break;
+
+       case FP_TRACE_STOP:
+               seq_printf(seq, "Capture stopped: ");
+               break;
+
+       default:
+               printk("type: 0x%04x\n", event);
+               BUG();
+       }
+
+       for (i = 0; i < 15; i++)
+               if ((this_event_hits & (1 << i))) {
+                       if (!hit) {
+                               seq_printf(seq, "hit ");
+                               hit = 1;
+                       }
+                       seq_printf(seq, "branch %d, ", i);
+               }
+       if (!hit)
+               seq_printf(seq, "no hit\n");
+       else
+               seq_printf(seq, "\n");
+
+       return 0;
+}
+
+static struct seq_operations fp_trace_cov_seq_op = {
+       .start = &fp_trace_cov_seq_start,
+       .next = &fp_trace_cov_seq_next,
+       .stop = &fp_trace_cov_seq_stop,
+       .show = &fp_trace_cov_seq_show
+};
+
+static int fp_trace_cov_seq_open(struct inode *inode, struct file *file)
+{
+       if (!ready)
+               return -ENOENT;
+
+       return seq_open(file, &fp_trace_cov_seq_op);
+}
+
+static struct file_operations fp_trace_cov_seq_operations = {
+       .owner = THIS_MODULE,
+       .open = fp_trace_cov_seq_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = NULL,
+};
+
+extern struct proc_dir_entry *rthal_proc_root;
+
+int __init fp_trace_init(void)
+{
+       struct proc_dir_entry *entry;
+
+       if (STORAGE_SIZE & SIZE_MASK) {
+               printk("FPU tracer: storage size is not a power of 2\n");
+               return -EINVAL;
+       }
+
+       storage = vmalloc(STORAGE_SIZE);
+       if (!storage)
+               return -ENOSPC;
+
+       entry = create_proc_entry("fp_trace_dump", 0, rthal_proc_root);
+       if (!entry)
+               return -EINVAL;
+
+       entry->proc_fops = &fp_trace_seq_operations;
+       entry->owner = THIS_MODULE;
+
+       entry = create_proc_entry("fp_trace_cov", 0, rthal_proc_root);
+       if (!entry)
+               return -EINVAL;
+
+       entry->proc_fops = &fp_trace_cov_seq_operations;
+       entry->owner = THIS_MODULE;
+
+       points = begin = end = 0;
+       starting = ready = 1;
+       return 0;
+}
+
+module_init(fp_trace_init);
+
+#endif
diff --git a/ksrc/nucleus/pod.c b/ksrc/nucleus/pod.c
index 21763d5..76e1fe1 100644
--- a/ksrc/nucleus/pod.c
+++ b/ksrc/nucleus/pod.c
@@ -43,6 +43,7 @@
 #include <nucleus/module.h>
 #include <nucleus/stat.h>
 #include <nucleus/assert.h>
+#include <nucleus/fpu_trace.h>
 #include <asm/xenomai/bits/pod.h>
 
 #ifndef CONFIG_XENO_OPT_DEBUG_NUCLEUS
@@ -2036,6 +2037,7 @@ EXPORT_SYMBOL_GPL(xnpod_welcome_thread);
 static inline void xnpod_switch_to(xnsched_t *sched,
                                   xnthread_t *prev, xnthread_t *next)
 {
+       fp_trace_xeno_switch(prev, next);
 #ifdef CONFIG_XENO_HW_UNLOCKED_SWITCH
        sched->last = prev;
        __setbits(sched->status, XNSWLOCK);
diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c
index ee59923..2b51c99 100644
--- a/ksrc/nucleus/shadow.c
+++ b/ksrc/nucleus/shadow.c
@@ -50,6 +50,9 @@
 #include <nucleus/trace.h>
 #include <nucleus/stat.h>
 #include <nucleus/sys_ppd.h>
+
+#include <nucleus/fpu_trace.h>
+
 #include <asm/xenomai/features.h>
 #include <asm/xenomai/syscall.h>
 #include <asm/xenomai/bits/shadow.h>
@@ -1769,6 +1772,7 @@ static int xnshadow_sys_trace(struct pt_regs *regs)
                break;
 
        case __xntrace_op_user_freeze:
+               fp_trace_freeze();
                err = xnarch_trace_user_freeze(__xn_reg_arg2(regs),
                                               __xn_reg_arg3(regs));
                break;
diff --git a/src/testsuite/switchtest/switchtest.c 
b/src/testsuite/switchtest/switchtest.c
index 66d0eab..e8884e2 100644
--- a/src/testsuite/switchtest/switchtest.c
+++ b/src/testsuite/switchtest/switchtest.c
@@ -1415,6 +1415,8 @@ int main(int argc, const char *argv[])
 
        /* Allow a second Ctrl-C in case of lockup. */
        pthread_sigmask(SIG_UNBLOCK, &mask, NULL);
+       if (freeze_on_error)
+               xntrace_user_freeze(0, 0);
 
        /* Cleanup. */
   cleanup:


_______________________________________________
Xenomai-git mailing list
Xenomai-git@gna.org
https://mail.gna.org/listinfo/xenomai-git

Reply via email to