Philippe Gerum wrote:
> Jan Kiszka wrote:
>> Philippe Gerum wrote:
>>> ...
>>> It seems that the pipeline log is not synced by
>>> __ipipe_unstall_iret_root.
>>> We need to know why. Question: is the root stage stalled or unstalled by
>>> this
>>> routine during the latest call before the box freezes?
>>
>>
>> I'm currently switching my brain between to many tasks: Could you simply
>> tell me what variable to check so that I can hack some
>> ipipe_trace_special into the kernel?
> 
> The value of the IPIPE_STALL_FLAG for the root domain upon exit from
> __ipipe_unstall_iret_root.
> 

The problem seems to be the stalled Xenomai domain:

>   fn                 1917    3.503  cond_resched+0x9 
> (console_conditional_schedule+0x16)
>  |fn                 1921    2.706  __ipipe_handle_irq+0xe 
> (common_interrupt+0x18)
>  |fn                 1923    1.548  __ipipe_ack_common_irq+0x9 
> (__ipipe_handle_irq+0xc0)
>  |fn                 1925    4.390  mask_and_ack_8259A+0xb 
> (__ipipe_ack_common_irq+0x47)
>  |(0x20) 0x00000000  1929    0.796  __ipipe_handle_irq+0x144 
> (common_interrupt+0x18)
>  |(0x30) 0x00000064  1930    0.766  __ipipe_handle_irq+0x15c 
> (common_interrupt+0x18)
>  |(0x31) 0x00000064  1931    0.812  __ipipe_handle_irq+0x169 
> (common_interrupt+0x18)
>  |(0x32) 0x000000c8  1932    0.766  __ipipe_handle_irq+0x17e 
> (common_interrupt+0x18)
>  |(0x32) 0x00000001  1932    0.781  __ipipe_handle_irq+0x188 
> (common_interrupt+0x18)
>  |(0x21) 0x00000000  1933    1.383  __ipipe_handle_irq+0x208 
> (common_interrupt+0x18)
>  |fn                 1934    1.413  __ipipe_stall_root+0x8 (resume_kernel+0x5)
>   fn                 1936    1.052  __ipipe_unstall_iret_root+0x8 
> (restore_raw+0x0)
>  |(0x11) 0x00000000  1937    0.932  __ipipe_unstall_iret_root+0x31 
> (restore_raw+0x0)
>  |(0x03) 0x00000000  1938    1.774  __ipipe_unstall_iret_root+0x64 
> (restore_raw+0x0)
>   fn                 1940    0.736  console_conditional_schedule+0x8 
> (fbcon_redraw+0xdf)

This was taken during the failing Linux timer tick with the attached
instrumentation hack.

BTW, that trace hacking reminds me that we should really think about
making a kernel debugger run. I recently noticed that latest kgdb
applied with a single failing hunk on top of ipipe (2.6.15, x86). Maybe
it is just about making kgdb's irq-locks ipipe-aware and bypassing the
ipipe for int3 and the serial IRQ (so that ipipe can be debugged as
well) and catching the relevant exceptions. Hmm, the debugger seems to
get initialised in the "early" stage. Is this before or after ipipe setup?

Jan
--- arch/i386/kernel/ipipe-root.c.orig  2006-04-05 23:13:45.000000000 +0200
+++ arch/i386/kernel/ipipe-root.c       2006-04-07 14:35:30.000000000 +0200
@@ -315,11 +315,13 @@ asmlinkage void __ipipe_unstall_iret_roo
           emulation. */
 
        if (!(regs.eflags & X86_EFLAGS_IF)) {
+ipipe_trace_special(0x10, 0);
                __set_bit(IPIPE_STALL_FLAG,
                          &ipipe_root_domain->cpudata[cpuid].status);
                ipipe_mark_domain_stall(ipipe_root_domain, cpuid);
                regs.eflags |= X86_EFLAGS_IF;
        } else {
+ipipe_trace_special(0x11, 0);
                __clear_bit(IPIPE_STALL_FLAG,
                            &ipipe_root_domain->cpudata[cpuid].status);
 
@@ -335,6 +337,7 @@ asmlinkage void __ipipe_unstall_iret_roo
 #ifdef CONFIG_IPIPE_TRACE_IRQSOFF
        ipipe_trace_end(0x8000000D);
 #endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+ipipe_trace_special(0x03, ipipe_root_domain->cpudata[cpuid].status);
 }
 
 asmlinkage int __ipipe_syscall_root(struct pt_regs regs)
@@ -457,20 +460,26 @@ fastcall int __ipipe_divert_exception(st
 static inline void __ipipe_walk_pipeline(struct list_head *pos, int cpuid)
 {
        struct ipipe_domain *this_domain = ipipe_percpu_domain[cpuid];
+ipipe_trace_special(0x30, ipipe_root_domain->priority);
+ipipe_trace_special(0x31, this_domain->priority);
 
        while (pos != &__ipipe_pipeline) {
                struct ipipe_domain *next_domain =
                    list_entry(pos, struct ipipe_domain, p_link);
+ipipe_trace_special(0x32, next_domain->priority);
+ipipe_trace_special(0x32, next_domain->cpudata[cpuid].status);
 
                if (test_bit
                    (IPIPE_STALL_FLAG, &next_domain->cpudata[cpuid].status))
                        break;  /* Stalled stage -- do not go further. */
 
+ipipe_trace_special(0x34, 0);
                if (next_domain->cpudata[cpuid].irq_pending_hi != 0) {
 
                        if (next_domain == this_domain)
                                __ipipe_sync_stage(IPIPE_IRQMASK_ANY);
                        else {
+ipipe_trace_special(0x35, 0);
                                __ipipe_switch_to(this_domain, next_domain,
                                                  cpuid);
 
@@ -483,6 +492,7 @@ static inline void __ipipe_walk_pipeline
                                        __ipipe_sync_stage(IPIPE_IRQMASK_ANY);
                        }
 
+ipipe_trace_special(0x36, 0);
                        break;
                } else if (next_domain == this_domain)
                        break;
@@ -587,7 +597,9 @@ int __ipipe_handle_irq(struct pt_regs re
           marked as 'sticky'. This search does not go beyond the
           current domain in the pipeline. */
 
+ipipe_trace_special(0x20, 0);
        __ipipe_walk_pipeline(head, cpuid);
+ipipe_trace_special(0x21, 0);
 
        ipipe_load_cpuid();
 

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
Xenomai-core mailing list
Xenomai-core@gna.org
https://mail.gna.org/listinfo/xenomai-core

Reply via email to